<a href="https://colab.research.google.com/github/SShresth7272/Python-Code/blob/main/Pandas_Practice_sheet0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

# =============================================================================
# 1. CREATING DATAFRAMES FROM DICTIONARIES
# =============================================================================

print("=" * 60)
print("1. CREATING DATAFRAMES FROM DICTIONARIES")
print("=" * 60)

# Method 1: Dictionary where keys become column names
data1 = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
    'Age': [25, 30, 35, 28, 32],
    'City': ['New York', 'London', 'Paris', 'Tokyo', 'Sydney'],
    'Salary': [50000, 60000, 70000, 55000, 65000]
}

df1 = pd.DataFrame(data1)
print("DataFrame from dictionary (keys as columns):")
print(df1)
print()

# Method 2: Dictionary with orientation 'index' - keys become row indices
data2 = {
    'Row1': ['Alice', 25, 'New York', 50000],
    'Row2': ['Bob', 30, 'London', 60000],
    'Row3': ['Charlie', 35, 'Paris', 70000]
}

df2 = pd.DataFrame.from_dict(data2, orient='index', columns=['Name', 'Age', 'City', 'Salary'])
print("DataFrame from dictionary (keys as rows):")
print(df2)
print()

# =============================================================================
# 2. ACCESSING AND MODIFYING DATA
# =============================================================================

print("=" * 60)
print("2. ACCESSING AND MODIFYING DATA")
print("=" * 60)

# Accessing columns
print("Accessing 'Name' column:")
print(df1['Name'])
print()

# Accessing multiple columns
print("Accessing multiple columns:")
print(df1[['Name', 'Salary']])
print()

# Adding a new column
df1['Bonus'] = df1['Salary'] * 0.1  # 10% bonus
print("DataFrame with new 'Bonus' column:")
print(df1)
print()

# Modifying a column
df1['Age'] = df1['Age'] + 1  # Increase all ages by 1
print("DataFrame with modified 'Age' column:")
print(df1)
print()

# =============================================================================
# 3. FILTERING AND SELECTION
# =============================================================================

print("=" * 60)
print("3. FILTERING AND SELECTION")
print("=" * 60)

# Filter rows based on condition
high_salary = df1[df1['Salary'] > 60000]
print("Employees with salary > 60000:")
print(high_salary)
print()

# Multiple conditions
young_high_earners = df1[(df1['Age'] < 30) & (df1['Salary'] > 50000)]
print("Young employees with salary > 50000:")
print(young_high_earners)
print()

# Using query method
london_employees = df1.query('City == "London"')
print("Employees from London:")
print(london_employees)
print()

# =============================================================================
# 4. DATA ANALYSIS OPERATIONS
# =============================================================================

print("=" * 60)
print("4. DATA ANALYSIS OPERATIONS")
print("=" * 60)

# Basic statistics
print("Basic statistics:")
print(df1.describe())
print()

# Mean, sum, etc.
print(f"Average salary: ${df1['Salary'].mean():.2f}")
print(f"Total salary: ${df1['Salary'].sum():.2f}")
print(f"Maximum age: {df1['Age'].max()}")
print()

# Group by operations
print("Average salary by city:")
city_stats = df1.groupby('City')['Salary'].mean()
print(city_stats)
print()

# =============================================================================
# 5. HANDLING MISSING DATA
# =============================================================================

print("=" * 60)
print("5. HANDLING MISSING DATA")
print("=" * 60)

# Create DataFrame with missing values
data_with_na = {
    'Name': ['Alice', 'Bob', 'Charlie', None, 'Eve'],
    'Age': [25, None, 35, 28, 32],
    'City': ['New York', 'London', None, 'Tokyo', 'Sydney'],
    'Salary': [50000, 60000, None, 55000, 65000]
}

df_na = pd.DataFrame(data_with_na)
print("DataFrame with missing values:")
print(df_na)
print()

# Check for missing values
print("Missing values count:")
print(df_na.isnull().sum())
print()

# Fill missing values
df_filled = df_na.fillna({
    'Name': 'Unknown',
    'Age': df_na['Age'].mean(),
    'City': 'Unknown',
    'Salary': df_na['Salary'].mean()
})
print("DataFrame after filling missing values:")
print(df_filled)
print()

# =============================================================================
# 6. SORTING AND RANKING
# =============================================================================

print("=" * 60)
print("6. SORTING AND RANKING")
print("=" * 60)

# Sort by single column
sorted_by_salary = df1.sort_values('Salary', ascending=False)
print("Sorted by salary (descending):")
print(sorted_by_salary)
print()

# Sort by multiple columns
sorted_multi = df1.sort_values(['City', 'Salary'], ascending=[True, False])
print("Sorted by city (ascending) and salary (descending):")
print(sorted_multi)
print()

# =============================================================================
# 7. ADVANCED OPERATIONS
# =============================================================================

print("=" * 60)
print("7. ADVANCED OPERATIONS")
print("=" * 60)

# Creating new calculated columns
df1['Total_Compensation'] = df1['Salary'] + df1['Bonus']
df1['Salary_Category'] = np.where(df1['Salary'] > 60000, 'High', 'Medium')
print("DataFrame with calculated columns:")
print(df1)
print()

# Pivot table
print("Pivot table - Average salary by city:")
pivot_table = df1.pivot_table(values='Salary', index='City', aggfunc='mean')
print(pivot_table)
print()

# =============================================================================
# 8. EXPORTING DATA
# =============================================================================

print("=" * 60)
print("8. EXPORTING DATA")
print("=" * 60)

# Export to various formats (commented out to avoid creating files)
# df1.to_csv('employee_data.csv', index=False)
# df1.to_excel('employee_data.xlsx', index=False)
# df1.to_json('employee_data.json')

print("Data can be exported to CSV, Excel, JSON formats")
print("Example: df1.to_csv('employee_data.csv', index=False)")

# =============================================================================
# 9. CONVERTING BACK TO DICTIONARY
# =============================================================================

print("=" * 60)
print("9. CONVERTING BACK TO DICTIONARY")
print("=" * 60)

# Convert DataFrame back to dictionary
dict_from_df = df1.to_dict()
print("DataFrame converted back to dictionary:")
print(dict_from_df)
print()

# Convert with different orientations
dict_records = df1.to_dict('records')
print("DataFrame as list of records:")
for record in dict_records:
    print(record)

print("\n" + "=" * 60)
print("PROGRAM COMPLETED SUCCESSFULLY!")
print("=" * 60)

1. CREATING DATAFRAMES FROM DICTIONARIES
DataFrame from dictionary (keys as columns):
      Name  Age      City  Salary
0    Alice   25  New York   50000
1      Bob   30    London   60000
2  Charlie   35     Paris   70000
3    Diana   28     Tokyo   55000
4      Eve   32    Sydney   65000

DataFrame from dictionary (keys as rows):
         Name  Age      City  Salary
Row1    Alice   25  New York   50000
Row2      Bob   30    London   60000
Row3  Charlie   35     Paris   70000

2. ACCESSING AND MODIFYING DATA
Accessing 'Name' column:
0      Alice
1        Bob
2    Charlie
3      Diana
4        Eve
Name: Name, dtype: object

Accessing multiple columns:
      Name  Salary
0    Alice   50000
1      Bob   60000
2  Charlie   70000
3    Diana   55000
4      Eve   65000

DataFrame with new 'Bonus' column:
      Name  Age      City  Salary   Bonus
0    Alice   25  New York   50000  5000.0
1      Bob   30    London   60000  6000.0
2  Charlie   35     Paris   70000  7000.0
3    Diana   28     Tok