In [1]:
# ============================================================
# TASK 2 : FINANCIAL DATA PREPROCESSING (GOOGLE COLAB)
# ============================================================

import pandas as pd
import numpy as np
from google.colab import files

print("\n================== TASK 2: FINANCIAL DATA ==================\n")

# ---------------- UPLOAD DATASET ----------------
print("Upload your financial CSV file")
uploaded = files.upload()

file_name = list(uploaded.keys())[0]
financial_df = pd.read_csv(file_name)

# ---------------- BEFORE SUMMARY ----------------
print("\n--- BEFORE ---")
print(financial_df.head())

# ---------------- HANDLE MISSING VALUES ----------------
financial_df["closing_price"] = financial_df["closing_price"].fillna(method="ffill")
financial_df["volume"] = financial_df["volume"].fillna(financial_df["volume"].median())

# ---------------- CREATE LAG FEATURES ----------------
financial_df["return_1d"] = financial_df["closing_price"].pct_change()
financial_df["return_7d"] = financial_df["closing_price"].pct_change(periods=7)

# ---------------- LOG NORMALIZE VOLUME ----------------
financial_df["volume_log"] = np.log1p(financial_df["volume"])

# ---------------- IQR OUTLIER DETECTION ----------------
Q1 = financial_df["closing_price"].quantile(0.25)
Q3 = financial_df["closing_price"].quantile(0.75)
IQR = Q3 - Q1

lower = Q1 - 1.5 * IQR
upper = Q3 + 1.5 * IQR

financial_df = financial_df[
    (financial_df["closing_price"] >= lower) &
    (financial_df["closing_price"] <= upper)
]

# ---------------- AFTER SUMMARY ----------------
print("\n--- AFTER ---")
print(financial_df.head())

# ---------------- SAVE CLEANED FILE ----------------
financial_df.to_csv("cleaned_financial_data.csv", index=False)
files.download("cleaned_financial_data.csv")

# ---------------- TEST CASES ----------------
assert financial_df["volume"].isna().sum() == 0
assert "return_1d" in financial_df.columns
assert financial_df["volume_log"].min() >= 0

print("\n Task 2 Passed All Tests")




Upload your financial CSV file


Saving financial_data (1).csv to financial_data (1) (1).csv

--- BEFORE ---
         date  closing_price  volume
0  2025-01-01            NaN  5000.0
1  2025-01-02         131.04  2000.0
2  2025-01-03         138.26  2000.0
3  2025-01-04         164.68     NaN
4  2025-01-05         165.06  5000.0

--- AFTER ---
         date  closing_price  volume  return_1d  return_7d  volume_log
1  2025-01-02         131.04  2000.0        NaN        NaN    7.601402
2  2025-01-03         138.26  2000.0   0.055098        NaN    7.601402
3  2025-01-04         164.68  2000.0   0.191089        NaN    7.601402
4  2025-01-05         165.06  5000.0   0.002308        NaN    8.517393
5  2025-01-06         137.99  1500.0  -0.164001        NaN    7.313887


  financial_df["closing_price"] = financial_df["closing_price"].fillna(method="ffill")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


âœ… Task 2 Passed All Tests
