In [1]:
# Getting Started with Pandas
# Objective: Introduce students to using Pandas for data analysis by loading data into Pandas
# DataFrames.

# Question 1: Importing Pandas and Loading a CSV File
# 2. Import the pandas library.
# 3. Load a CSV file into a DataFrame.
# Step 2: Import the pandas library
import pandas as pd
import os
def load_csv_to_dataframe():
    sample_data = """Name,Age,City
Alice,28,New York
Bob,34,Los Angeles
Charlie,22,Chicago"""
    default_filename = "sample_data.csv"
    if not os.path.exists(default_filename):
        with open(default_filename, 'w') as f:
            f.write(sample_data)
        print(f"Created sample CSV file: '{default_filename}'")
    try:
        df = pd.read_csv(default_filename)
        print(f"Successfully loaded '{default_filename}'")
        print("\nDataFrame preview:")
        print(df.head())
        return df
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None
dataframe = load_csv_to_dataframe()

Created sample CSV file: 'sample_data.csv'
Successfully loaded 'sample_data.csv'

DataFrame preview:
      Name  Age         City
0    Alice   28     New York
1      Bob   34  Los Angeles
2  Charlie   22      Chicago


In [2]:
# corrected Data Cleaning & Manipulation
# Objective: Practice cleaning data and manipulating DataFrames.

# Question 1: Handling Missing Values
# 9. Use the fillna() method to fill missing values with a specific value.


# QUestion 2: Renaming Columns
# 10. Change the names of specific columns using rename().

# Question 3: Dropping Duplicates
# 11. Remove duplicate rows from the DataFrame.

import pandas as pd
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Bob'],
    'Age': [25, None, 35, None],
    'Salary': [50000, 60000, 70000, 60000]
}
df = pd.DataFrame(data)

# Question 1: Handling Missing Values
# 9. Use the fillna() method to fill missing values with a specific value.
df_filled = df.fillna({'Age': 0})  # Fills missing 'Age' values with 0
print("After filling missing values:")
print(df_filled)

# Question 2: Renaming Columns
# 10. Change the names of specific columns using rename().
df_renamed = df.rename(columns={'Name': 'Employee Name', 'Salary': 'Monthly Salary'})
print("\nAfter renaming columns:")
print(df_renamed)

# Question 3: Dropping Duplicates
# 11. Remove duplicate rows from the DataFrame.
df_no_duplicates = df.drop_duplicates()
print("\nAfter dropping duplicates:")
print(df_no_duplicates)






After filling missing values:
      Name   Age  Salary
0    Alice  25.0   50000
1      Bob   0.0   60000
2  Charlie  35.0   70000
3      Bob   0.0   60000

After renaming columns:
  Employee Name   Age  Monthly Salary
0         Alice  25.0           50000
1           Bob   NaN           60000
2       Charlie  35.0           70000
3           Bob   NaN           60000

After dropping duplicates:
      Name   Age  Salary
0    Alice  25.0   50000
1      Bob   NaN   60000
2  Charlie  35.0   70000


In [3]:
#  corrected Data Aggregation & Exporting
# Objective: Aggregate data and export the results.

# Question 1: Grouping and Aggregating Data
# 12. Group data by a specific column and calculate the mean for each group.

# Question 2: Exporting Data to CSV
# 13. Export the DataFrame to a new CSV file.

# Question 3: Aggregating with Multiple Functions
# 14. Apply several aggregate functions to the grouped data.

import pandas as pd
data = {
    'Department': ['Sales', 'HR', 'Sales', 'HR', 'IT'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Salary': [50000, 60000, 55000, 62000, 70000],
    'Bonus': [5000, 4000, 4500, 4800, 5200]
}
df = pd.DataFrame(data)

# Question 1: Grouping and Aggregating Data
# 12. Group data by a specific column and calculate the mean for each group.
grouped_mean = df.groupby('Department')[['Salary', 'Bonus']].mean()
print("Mean Salary and Bonus by Department:")
print(grouped_mean)
df.to_csv('employees_export.csv', index=False)
print("\nData exported to 'employees_export.csv'.")
grouped_multiple = df.groupby('Department')[['Salary', 'Bonus']].agg(['mean', 'max', 'min'])
print("\nAggregated statistics by Department (mean, max, min):")
print(grouped_multiple)





Mean Salary and Bonus by Department:
             Salary   Bonus
Department                 
HR          61000.0  4400.0
IT          70000.0  5200.0
Sales       52500.0  4750.0

Data exported to 'employees_export.csv'.

Aggregated statistics by Department (mean, max, min):
             Salary                 Bonus            
               mean    max    min    mean   max   min
Department                                           
HR          61000.0  62000  60000  4400.0  4800  4000
IT          70000.0  70000  70000  5200.0  5200  5200
Sales       52500.0  55000  50000  4750.0  5000  4500
