In [1]:
import pandas as pd
import numpy as np

print("--- Intermediate Pandas Exercise ---")
print("Complete each task by writing the requested Pandas code.")
print("------------------------------------")

# Task 1: Basic Pandas Functions and Converting Arrays to DataFrames
print("\n--- Task 1: Basic Pandas Functions and Converting Arrays to DataFrames ---")

# 1.1 Create a Pandas Series
s1_1 = pd.Series([10, 20, 30, 40, 50])
print("Series s1_1:\n", s1_1)

# 1.2 Create a 3x4 NumPy array with random integers
np_array1_2 = np.random.randint(1, 101, size=(3, 4))
print("\nNumPy array np_array1_2:\n", np_array1_2)

# 1.3 Convert to DataFrame
df1_3 = pd.DataFrame(np_array1_2, columns=['A', 'B', 'C', 'D'])
print("\nDataFrame df1_3:\n", df1_3)

# Task 2: Synthetic Data Generation for Practice
print("\n--- Task 2: Synthetic Data Generation for Practice ---")

dates = pd.date_range(start='2023-01-01', end='2023-04-09')
df_sales = pd.DataFrame({
    'Date': dates,
    'Region': np.random.choice(['East', 'West', 'North', 'South'], size=len(dates)),
    'Product': np.random.choice(['Laptop', 'Mouse', 'Keyboard', 'Monitor'], size=len(dates)),
    'Sales': np.random.randint(100, 1001, size=len(dates)),
    'Quantity': np.random.randint(1, 11, size=len(dates))
})

# Introduce 5 NaNs in 'Sales' and 3 NaNs in 'Quantity'
sales_nan_indices = np.random.choice(df_sales.index, size=5, replace=False)
quantity_nan_indices = np.random.choice(df_sales.index, size=3, replace=False)
df_sales.loc[sales_nan_indices, 'Sales'] = np.nan
df_sales.loc[quantity_nan_indices, 'Quantity'] = np.nan

print("First 5 rows of df_sales:\n", df_sales.head())

# Task 3: Indexing and Slicing in DataFrames
print("\n--- Task 3: Indexing and Slicing in DataFrames ---")

df3_1 = df_sales[['Region', 'Sales']]
df3_2 = df_sales[df_sales['Product'] == 'Laptop']
df3_3 = df_sales[(df_sales['Region'] == 'East') & (df_sales['Sales'] > 500)]
val3_4 = df_sales.loc[df_sales['Product'] == 'Monitor', 'Sales'].iloc[0]

print("df3_1:\n", df3_1.head())
print("\ndf3_2 (Laptops):\n", df3_2.head())
print("\ndf3_3 (East & Sales > 500):\n", df3_3.head())
print("\nval3_4 (First Monitor Sales):", val3_4)

# Task 4: Data Cleaning (Handling Missing Values)
print("\n--- Task 4: Data Cleaning (Handling Missing Values) ---")

missing_counts4_1 = df_sales.isna().sum()
df4_2_filled = df_sales.copy()
df4_2_filled['Sales'] = df4_2_filled['Sales'].fillna(df4_2_filled['Sales'].mean())
df4_3_dropped = df_sales.dropna()

print("Missing counts:\n", missing_counts4_1)
print("\ndf4_2_filled (Sales filled with mean):\n", df4_2_filled.head())
print("\ndf4_3_dropped (Rows with NaN dropped):\n", df4_3_dropped.head())

# Task 5: Data Manipulation (Columns, Sorting, Grouping)
print("\n--- Task 5: Data Manipulation (Columns, Sorting, Grouping) ---")

# 5.1 Total Revenue column
df_sales['Total_Revenue'] = df_sales['Sales'] * df_sales['Quantity']

# 5.2 Sort by 'Date' ascending, then 'Sales' descending
df5_2_sorted = df_sales.sort_values(by=['Date', 'Sales'], ascending=[True, False])

# 5.3 Group by 'Region' and sum Sales/Quantity
df5_3_grouped_region = df_sales.groupby('Region')[['Sales', 'Quantity']].sum()

# 5.4 Group by 'Product' and average Sales
df5_4_grouped_product = df_sales.groupby('Product')['Sales'].mean()

print("df_sales with Total_Revenue:\n", df_sales.head())
print("\ndf5_2_sorted:\n", df5_2_sorted.head())
print("\ndf5_3_grouped_region:\n", df5_3_grouped_region)
print("\ndf5_4_grouped_product:\n", df5_4_grouped_product)


--- Intermediate Pandas Exercise ---
Complete each task by writing the requested Pandas code.
------------------------------------

--- Task 1: Basic Pandas Functions and Converting Arrays to DataFrames ---
Series s1_1:
 0    10
1    20
2    30
3    40
4    50
dtype: int64

NumPy array np_array1_2:
 [[ 56   4 100  97]
 [ 60  69  68  58]
 [ 25   7  63  21]]

DataFrame df1_3:
     A   B    C   D
0  56   4  100  97
1  60  69   68  58
2  25   7   63  21

--- Task 2: Synthetic Data Generation for Practice ---
First 5 rows of df_sales:
         Date Region   Product  Sales  Quantity
0 2023-01-01  North     Mouse  465.0       5.0
1 2023-01-02   West     Mouse  751.0       9.0
2 2023-01-03  North  Keyboard  674.0       6.0
3 2023-01-04   West   Monitor  645.0       4.0
4 2023-01-05   East    Laptop  683.0       7.0

--- Task 3: Indexing and Slicing in DataFrames ---
df3_1:
   Region  Sales
0  North  465.0
1   West  751.0
2  North  674.0
3   West  645.0
4   East  683.0

df3_2 (Laptops):
       