In [1]:
import pandas as pd

In [2]:
import pandas as pd
import numpy as np

data = {
    'Name': ['Mayank', 'Nilesh', 'Taniya', 'Suhani', 'Rakesh'],
    'Age': [20, np.nan, 20, 19, 46],
    'City': ['Dwarka', 'Gurugram', 'Delhi', 'Chennai', 'Farrukhnagar'],
    'Salary': [90000, 80000, np.nan, 60000, 75000]
}

df = pd.DataFrame(data)

# 2. Basic DataFrame Operations
print("DataFrame:")
print(df)
print("\nFirst 3 rows:")
print(df.head(3))
print("\nDataFrame info:")
print(df.info())
print("\nDescriptive statistics:")
print(df.describe())

# 3. Indexing and Selection
print("\nSelecting 'Name' and 'City':")
print(df[['Name', 'City']])

# 4. Data Cleaning
print("\nDropping rows with missing values:")
df_cleaned = df.dropna()
print(df_cleaned)
print("\nFilling missing 'Age' with mean:")
df['Age'].fillna(df['Age'].mean(), inplace=True)
print(df)

# 5. Data Transformation
print("\nSorting by 'Salary':")
df_sorted = df.sort_values(by='Salary')
print(df_sorted)
print("\nGrouping by 'City' and calculating mean salary:")
print(df.groupby('City')['Salary'].mean())

# 6. Adding/Removing Columns
df['Experience'] = [2, 3, 1, 5, 4]  # Adding a new column
print("\nDataFrame after adding 'Experience':")
print(df)
df.drop(columns=['Experience'], inplace=True)  # Removing the 'Experience' column
print("\nDataFrame after dropping 'Experience':")
print(df)

# 7. Filtering and Boolean Indexing
print("\nFiltering for Age > 25:")
filtered_df = df[df['Age'] > 25]
print(filtered_df)

# 8. Merging and Joining
data2 = {
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
    'State': ['NY', 'CA', 'IL', 'TX', 'AZ']
}
df2 = pd.DataFrame(data2)
print("\nMerging DataFrames on 'City':")
merged_df = pd.merge(df, df2, on='City')
print(merged_df)

# 9. Time Series
date_range = pd.date_range(start='2024-01-01', periods=5)
time_df = pd.DataFrame({'Date': date_range, 'Value': [10, 20, 30, 40, 50]})
time_df.set_index('Date', inplace=True)
print("\nTime Series DataFrame:")
print(time_df)
print("\nResampling to get monthly sum:")
print(time_df.resample('M').sum())

# 10. Exporting Data
df.to_csv('output.csv', index=False)
print("\nDataFrame exported to 'output.csv'.")



DataFrame:
     Name   Age          City   Salary
0  Mayank  20.0        Dwarka  90000.0
1  Nilesh   NaN      Gurugram  80000.0
2  Taniya  20.0         Delhi      NaN
3  Suhani  19.0       Chennai  60000.0
4  Rakesh  46.0  Farrukhnagar  75000.0

First 3 rows:
     Name   Age      City   Salary
0  Mayank  20.0    Dwarka  90000.0
1  Nilesh   NaN  Gurugram  80000.0
2  Taniya  20.0     Delhi      NaN

DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    5 non-null      object 
 1   Age     4 non-null      float64
 2   City    5 non-null      object 
 3   Salary  4 non-null      float64
dtypes: float64(2), object(2)
memory usage: 292.0+ bytes
None

Descriptive statistics:
             Age   Salary
count   4.000000      4.0
mean   26.250000  76250.0
std    13.175103  12500.0
min    19.000000  60000.0
25%    19.750000  71250.0
50%    20.000000  

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].mean(), inplace=True)
  print(time_df.resample('M').sum())
