# Pandas Class Notebook: Data Manipulation & Cleaning


## 1️⃣ DataFrame Indexing and Selection

In [None]:
import pandas as pd

# Sample Data
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 30, 35, 40, 45],
    'City': ['Delhi', 'Mumbai', 'Bangalore', 'Hyderabad', 'Chennai']
}
df = pd.DataFrame(data)
df

### Select rows and columns

In [None]:
# Selecting column
df['Name']

# Selecting multiple columns
df[['Name', 'City']]

# Selecting rows by index position
df.iloc[0:3]

# Selecting rows by label
df.loc[0:2, ['Name', 'Age']]

## 2️⃣ Filtering and Querying Data

In [None]:
# Filter rows based on condition
df[df['Age'] > 30]

# Using query()
df.query('Age < 40 and City == "Mumbai"')

## 3️⃣ Excel Data Cleaning: Handling Merged Cells, Formatting Issues

In [None]:
# Simulating a dataset with merged or missing values
dirty_data = pd.DataFrame({
    'Name': ['Alice', None, 'Charlie', 'David', None],
    'Age': [25, 25, 35, None, 45],
    'City': ['Delhi', 'Delhi', None, 'Hyderabad', 'Hyderabad']
})
dirty_data

### Handling merged/missing cells

In [None]:
# Forward fill to handle merged-like cells
cleaned = dirty_data.fillna(method='ffill')
cleaned

## 4️⃣ Data Type Conversions

In [None]:
# Example: convert column data type
df['Age'] = df['Age'].astype(float)
df.dtypes

## 5️⃣ Basic Data Manipulation (rename, drop, fillna)

In [None]:
# Rename column
df.rename(columns={'City': 'Location'}, inplace=True)

# Drop a column
df = df.drop(columns=['Age'])

# Fill missing data
df.fillna('Unknown', inplace=True)
df

## 6️⃣ Advanced Data Manipulation (merge, join, concat)

In [None]:
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'ID': [1, 2, 3], 'Score': [85, 90, 95]})

# Merge on ID
merged = pd.merge(df1, df2, on='ID')
merged

# Concat example
concat_df = pd.concat([df1, df2], axis=0, ignore_index=True)
concat_df

## 7️⃣ GroupBy Operations and Aggregations

In [1]:
sales = pd.DataFrame({
    'City': ['Delhi', 'Delhi', 'Mumbai', 'Mumbai', 'Chennai'],
    'Sales': [100, 150, 200, 250, 300]
})
sales.groupby('City')['Sales'].sum()

NameError: name 'pd' is not defined

## 8️⃣ Pivot Tables (Excel-style operations)

In [None]:
pivot = sales.pivot_table(values='Sales', index='City', aggfunc='sum')
pivot

## 9️⃣ Writing Data Back to Excel Files

In [None]:
# Writing cleaned or final data to Excel
pivot.to_excel('final_output.xlsx', index=True)
print('Excel file saved successfully!')

In [3]:
import pandas as pd
# Example DataFrames
sales = pd.DataFrame({
    'Product': ['Laptop', 'Mouse', 'Keyboard'],
    'Sales': [1200, 300, 450]
})

customers = pd.DataFrame({
    'Customer': ['Alice', 'Bob', 'Charlie'],
    'City': ['Delhi', 'Mumbai', 'Pune']
})

# Use ExcelWriter to save both DataFrames in one file
with pd.ExcelWriter('Company_Data.xlsx') as writer:
    sales.to_excel(writer, sheet_name='Sales_Data', index=False)
    customers.to_excel(writer, sheet_name='Customer_Data', index=False)

print(" Excel file created successfully with two sheets!")


 Excel file created successfully with two sheets!
