In [None]:
# Getting Started with Pandas
# Objective: Introduce students to using Pandas for data analysis by loading data into Pandas
# DataFrames.

# Question 1: Importing Pandas and Loading a CSV File
# 1. Open your Jupyter Notebook or a Python environment.
# 2. Import the pandas library.
# 3. Load a CSV file into a DataFrame.




# Question 2: Displaying the First Few Rows
# 4. Use the head() method to display the first five rows of the DataFrame.





# Question 3: Basic Data Information
# 5. Use the info() method to get a concise summary of the DataFrame.
import pandas as pd
import os
def load_csv_to_dataframe():
    sample_data = """Name,Age,City
Alice,28,New York
Bob,34,Los Angeles
Charlie,22,Chicago"""
    default_filename = "sample_data.csv"
    if not os.path.exists(default_filename):
        with open(default_filename, 'w') as f:
            f.write(sample_data)
        print(f"Created sample CSV file: '{default_filename}'")
    try:
        df = pd.read_csv(default_filename)
        print(f"Successfully loaded '{default_filename}'")
        print("\nDataFrame preview:")
        print(df.head())
        return df
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None
dataframe = load_csv_to_dataframe()





In [None]:
# Data Inspection & Selection
# Objective: Learn how to inspect data and select specific data points.

# Question 1: Inspecting Column Data Types
# 6. Use the dtypes attribute to inspect the data types of each column.




# Question 2: Selecting Columns
# 7. Select a single column from the DataFrame.





# Question 3: Slicing Rows
# 8. Select specific rows using slicing.
import pandas as pd
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'Salary': [50000, 60000, 70000, 80000]
}
df = pd.DataFrame(data)
print("Column Data Types:")
print(df.dtypes)

print("\nSelected Column - Age:")
print(df['Age'])

print("\nSliced Rows (first 2):")
print(df[:2])





In [None]:
# Data Cleaning & Manipulation
# Objective: Practice cleaning data and manipulating DataFrames.

# Question 1: Handling Missing Values
# 9. Use the fillna() method to fill missing values with a specific value.




# QUestion 2: Renaming Columns
# 10. Change the names of specific columns using rename().




# Question 3: Dropping Duplicates
# 11. Remove duplicate rows from the DataFrame.
import pandas as pd
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Bob'],
    'Age': [25, None, 35, None],
    'Salary': [50000, 60000, 70000, 60000]
}
df = pd.DataFrame(data)

# Question 1: Handling Missing Values
# 9. Use the fillna() method to fill missing values with a specific value.
df_filled = df.fillna({'Age': 0})  # Fills missing 'Age' values with 0
print("After filling missing values:")
print(df_filled)

# Question 2: Renaming Columns
# 10. Change the names of specific columns using rename().
df_renamed = df.rename(columns={'Name': 'Employee Name', 'Salary': 'Monthly Salary'})
print("\nAfter renaming columns:")
print(df_renamed)

# Question 3: Dropping Duplicates
# 11. Remove duplicate rows from the DataFrame.
df_no_duplicates = df.drop_duplicates()
print("\nAfter dropping duplicates:")
print(df_no_duplicates)






In [None]:
# Data Aggregation & Exporting
# Objective: Aggregate data and export the results.

# Question 1: Grouping and Aggregating Data
# 12. Group data by a specific column and calculate the mean for each group.





# Question 2: Exporting Data to CSV
# 13. Export the DataFrame to a new CSV file.





# Question 3: Aggregating with Multiple Functions
# 14. Apply several aggregate functions to the grouped data.

import pandas as pd
data = {
    'Department': ['Sales', 'HR', 'Sales', 'HR', 'IT'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Salary': [50000, 60000, 55000, 62000, 70000],
    'Bonus': [5000, 4000, 4500, 4800, 5200]
}
df = pd.DataFrame(data)
# Question 1: Grouping and Aggregating Data
# 12. Group data by a specific column and calculate the mean for each group.
grouped_mean = df.groupby('Department')[['Salary', 'Bonus']].mean()
print("Mean Salary and Bonus by Department:")
print(grouped_mean)
df.to_csv('employees_export.csv', index=False)
print("\nData exported to 'employees_export.csv'.")
grouped_multiple = df.groupby('Department')[['Salary', 'Bonus']].agg(['mean', 'max', 'min'])
print("\nAggregated statistics by Department (mean, max, min):")
print(grouped_multiple)


