In [4]:

import pandas as pd
import numpy as np

def exercise_1_create_df():
    np.random.seed(0)
    df = pd.DataFrame({
        "id": range(1, 11),
        "name": [f"Person_{i}" for i in range(1, 11)],
        "age": np.random.randint(18, 60, size=10),
        "score": np.random.randint(0, 101, size=10),
        "city": np.random.choice(["NY", "LA", "SF"], size=10),
    })
    print("Exercise 1 - DataFrame (first 5 rows):")
    print(df.head(5))
    print("-" * 60)
    return df

def exercise_2_filter_numeric_gt_50(df):
    filtered = df[df["score"] > 50]
    print("Exercise 2 - Filter rows where score > 50:")
    print(filtered)
    print("-" * 60)
    return filtered

def exercise_3_add_new_column(df):
    df = df.copy()
    df["age_score"] = df["age"] * df["score"]
    print("Exercise 3 - Add new column 'age_score' = age * score:")
    print(df.head(5))
    print("-" * 60)
    return df

def exercise_4_drop_dupes_fill_missing():
    df = pd.DataFrame({
        "id": [1, 2, 2, 3, 4, 5],
        "value": [10, np.nan, np.nan, 40, 50, 50],
        "score": [60, 70, 70, np.nan, 90, 90]
    })
    print("Exercise 4 - Original with duplicates + missing:")
    print(df)
    df_no_dupes = df.drop_duplicates()
    numeric_cols = df_no_dupes.select_dtypes(include="number").columns
    df_filled = df_no_dupes.copy()
    for col in numeric_cols:
        df_filled[col] = df_filled[col].fillna(df_filled[col].mean())

    print("\nExercise 4 - After drop_duplicates + fillna(mean):")
    print(df_filled)
    print("-" * 60)
    return df_filled

def exercise_5_merge_two_dataframes():
    df_left = pd.DataFrame({
        "id": [1, 2, 3, 4],
        "name": ["Ava", "Ben", "Cara", "Dan"]
    })
    df_right = pd.DataFrame({
        "id": [2, 3, 4, 5],
        "department": ["Sales", "Eng", "HR", "Legal"]
    })

    merged = df_left.merge(df_right, on="id", how="inner")
    print("Exercise 5 - Merge on 'id' (inner join):")
    print("Left:\n", df_left)
    print("Right:\n", df_right)
    print("Merged:\n", merged)
    print("-" * 60)
    return merged

def exercise_6_time_series_resample_monthly():
    rng = pd.date_range("2025-01-01", periods=90, freq="D")
    ts = pd.DataFrame({
        "date": rng,
        "value": np.random.randint(0, 100, size=len(rng))
    }).set_index("date")

    monthly = ts.resample("MS").mean()
    print("Exercise 6 - Time series (daily) head:")
    print(ts.head(5))
    print("\nExercise 6 - Resampled monthly (mean):")
    print(monthly)
    print("-" * 60)
    return ts, monthly

def main():
    df = exercise_1_create_df()
    _ = exercise_2_filter_numeric_gt_50(df)
    _ = exercise_3_add_new_column(df)
    _ = exercise_4_drop_dupes_fill_missing()
    _ = exercise_5_merge_two_dataframes()
    _ = exercise_6_time_series_resample_monthly()

if __name__ == "__main__":
    main()


Exercise 1 - DataFrame (first 5 rows):
   id      name  age  score city
0   1  Person_1   18     88   LA
1   2  Person_2   21     88   LA
2   3  Person_3   21     12   LA
3   4  Person_4   57     58   NY
4   5  Person_5   27     65   LA
------------------------------------------------------------
Exercise 2 - Filter rows where score > 50:
   id       name  age  score city
0   1   Person_1   18     88   LA
1   2   Person_2   21     88   LA
3   4   Person_4   57     58   NY
4   5   Person_5   27     65   LA
6   7   Person_7   39     87   NY
8   9   Person_9   41     88   SF
9  10  Person_10   24     81   NY
------------------------------------------------------------
Exercise 3 - Add new column 'age_score' = age * score:
   id      name  age  score city  age_score
0   1  Person_1   18     88   LA       1584
1   2  Person_2   21     88   LA       1848
2   3  Person_3   21     12   LA        252
3   4  Person_4   57     58   NY       3306
4   5  Person_5   27     65   LA       1755
-------