#### 1. DataFrame 1: Student Grades

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

data1 = {
    'Student_ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Math': [85, 90, 78, 92, 88, 95, 89, 79, 83, 91],
    'English': [78, 85, 88, 80, 92, 87, 90, 84, 79, 88],
    'Science': [90, 92, 85, 88, 94, 79, 83, 91, 87, 89]
}

df1 = pd.DataFrame(data1)

# Exercise 1: Calculate the average grade for each student.

# Calculate the average grade for each student across all subjects
df1['Average'] = df1[['Math', 'English', 'Science']].mean(axis=1)

# Display the result
df1[['Student_ID', 'Average']]


# Exercise 2: Find the student with the highest average grade.

# Find the student with the highest average grade
top_student = df1.loc[df1['Average'].idxmax()]

# Display the result
print("Student with the highest average grade:")
print(top_student[['Student_ID', 'Average']])

# Exercise 3: Create a new column 'Total' representing the total marks obtained by each student.

# Summing the three cols 'Math', 'English', and 'Science'
df1['Total'] = df1[['Math', 'English', 'Science']].sum(axis=1)

# The result data frame
df1

# Exercise 4: Plot a bar chart to visualize the average grades in each subject.

# Slicing only 'Math, English, Science' cols
subjects = df1.iloc[:, 1:4]

# Calculating the mean from the three cols (Math, English, Science)
subjects_mean = subjects.mean()

# Slicing the col names (Math, English, Science)
labels = subjects.columns

# Applying bar char on two data objects
plt.bar(labels, subjects_mean)

# plt.show() visualizes the bar chart
plt.show()



#### DataFrame 2: Sales Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

data2 = {
    'Date': pd.date_range(start='2023-01-01', periods=10),
    'Product_A': [120, 150, 130, 110, 140, 160, 135, 125, 145, 155],
    'Product_B': [90, 110, 100, 80, 95, 105, 98, 88, 102, 112],
    'Product_C': [75, 80, 85, 70, 88, 92, 78, 82, 87, 90]
}

df2 = pd.DataFrame(data2)

# Exercise 1: Calculate the total sales for each product.

# Slicing all 'Product_' columns and summing, and converting the result to data frame 'Total_Sales' for better visual
total_sales = df2[['Product_A', 'Product_B', 'Product_C']].sum().to_frame('Total_Sales')

# The result data frame
total_sales


# Exercise 2: Find the date with the highest total sales.

# Calculate total sales per date (sum of all products for each row)
df2['Total_Sales'] = df2[['Product_A', 'Product_B', 'Product_C']].sum(axis=1)

# Find the date with the maximum total sales
max_sales_date = df2.loc[df2['Total_Sales'].idxmax(), 'Date']

# Printing the result date
print("Date with highest total sales:", max_sales_date)

# Exercise 3: Calculate the percentage change in sales for each product from the previous day.

# Calculate daily percentage change for each product
percentage_change = df2[['Product_A', 'Product_B', 'Product_C']].pct_change() * 100

# Add the results back to the original DataFrame dropping "Total_Sales"
df2_with_pct_change = df2.drop(columns='Total_Sales').copy()
df2_with_pct_change[['Pct_Change_A', 'Pct_Change_B', 'Pct_Change_C']] = percentage_change.round(2)

# The result data frame
df2_with_pct_change

# Exercise 4: Plot a line chart to visualize the sales trends for each product over time.

# Set the figure size
plt.figure(figsize=(10, 6))

# Plot each product's sales over time
plt.plot(df2['Date'], df2['Product_A'], label='Product A', marker='o', linestyle='-')
plt.plot(df2['Date'], df2['Product_B'], label='Product B', marker='s', linestyle='--')
plt.plot(df2['Date'], df2['Product_C'], label='Product C', marker='^', linestyle=':')

# Customize the plot
plt.title('Daily Sales Trends by Product', fontsize=14, pad=20)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Sales', fontsize=12)
plt.legend(fontsize=10)
plt.grid(True, linestyle='--', alpha=0.6)

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)

# Display the plot
plt.tight_layout()
plt.show()


#### DataFrame 3: Employee Information

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

data3 = {
    'Employee_ID': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Emma', 'Frank', 'Grace', 'Hank', 'Ivy', 'Jack'],
    'Department': ['HR', 'IT', 'Marketing', 'IT', 'Finance', 'HR', 'Marketing', 'IT', 'Finance', 'Marketing'],
    'Salary': [60000, 75000, 65000, 80000, 70000, 72000, 68000, 78000, 69000, 76000],
    'Experience (Years)': [3, 5, 2, 8, 4, 6, 3, 7, 2, 5]
}

df3 = pd.DataFrame(data3)

# Calculate the average salary first grouping by 'Department', then calculate mean by 'Salary' col. Reset index to align and rounded to 2 decimal.
average_salary = df3.groupby('Department')['Salary'].mean().to_frame('Average_Salary').reset_index().round(2)

# The result data frame
average_salary

# Exercise 2: Find the employee with the most experience.

most_experienced = df3.loc[df3['Experience (Years)'].idxmax(), ['Employee_ID', 'Name', 'Experience (Years)']].to_frame().T

# The result data frame
most_experienced

# Exercise 3: Create a new column 'Salary Increase' representing the percentage increase in salary from the minimum salary in the dataframe.

# Creating 'min_salary' variable from 'Salary' col
min_salary = df3['Salary'].min()

# Adding a new 'sarlary increase' col to df3 calculating the difference of every salary from the min salary
df3['Salary increase (%)'] = ((df3['Salary'] - min_salary)/min_salary * 100).round(2)

# The result data frame
df3

# Exercise 4: Plot a bar chart to visualize the distribution of employees across different departments.

# Calculating employee count per department
dept_distribution = df3['Department'].value_counts().sort_index()

# Creating bar chart
plt.figure(figsize=(8, 5))

bars = plt.bar(dept_distribution.index, dept_distribution.values, color = 'teal')

# Customize the plot
plt.title('Employee Distribution by Department', fontsize=14, pad=20)
plt.xlabel('Department', fontsize=12)
plt.ylabel('Number of Employees', fontsize=12)
plt.xticks(rotation=0)  # Keep department names horizontal

# Add value labels on top of bars
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, height,
             f'{height}',  # Label value
             ha='center', va='bottom', fontsize=11)

plt.grid(axis='y', linestyle='--', alpha=0.3)  # Light horizontal grid
plt.tight_layout()
plt.show()

#### DataFrame 4: Customer Orders

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

data4 = {
    'Order_ID': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
    'Customer_ID': [201, 202, 203, 204, 205, 206, 207, 208, 209, 210],
    'Product': ['A', 'B', 'A', 'C', 'B', 'C', 'A', 'C', 'B', 'A'],
    'Quantity': [2, 3, 1, 4, 2, 3, 2, 5, 1, 3],
    'Total_Price': [120, 180, 60, 240, 160, 270, 140, 300, 90, 180]
}

df4 = pd.DataFrame(data4)

# Exercise 1: Calculate the total revenue from all orders.

total_revenue = df4['Total_Price'].sum()

# The result 

print(f'The total revenue is $ {total_revenue}.')

# Exercise 2: Find the most ordered product.

# Grouping by 'Product', and then counting the products sold, then returning the max sold product by idxmax() method
top_product = df4.groupby('Product')['Quantity'].sum().idxmax()

# The result 

print(f'The most product sold most is {top_product}')

# Exercise 3: Calculate the average quantity of products ordered.

average_qty = df4['Quantity'].mean()

print(f'The average product quantity ordered is {average_qty}')

# Exercise 4: Plot a pie chart to visualize the distribution of sales across different products.

# Calculate total sales per product
product_sales = df4.groupby('Product')['Total_Price'].sum()

#Create pie chart
plt.figure(figsize=(8, 6))    # setting the size of the figure
plt.pie(
    product_sales,
    labels=product_sales.index,
    autopct='%1.1f%%',        # Show percentages
    startangle=90,            # Rotate for better readability
    colors=['skyblue', 'lightgreen', 'salmon'],  # Custom colors
    wedgeprops={'edgecolor': 'white', 'linewidth': 1}  # Add borders
)

# Add title and equal aspect ratio
plt.title('Sales Distribution by Product', pad=20, fontsize=14)
plt.axis('equal')  # Ensures pie is circular

# Show plot
plt.tight_layout()
plt.show()
