In [None]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler

BASE_URL = "http://127.0.0.1:8000"

# 1. UTILITY FUNCTIONS
def fetch_data(endpoint: str):
    """
    FETCHES DATA FROM THE GIVEN API ENDPOINT AND RETURNS IT AS JSON.
    """
    response = requests.get(f"{BASE_URL}/{endpoint}/")
    response.raise_for_status()
    return response.json()


def clean_data(df: pd.DataFrame):
    """
    DROPS ROWS WITH MISSING VALUES IN THE DATAFRAME.
    """
    return df.dropna()

def replace_null_with_random(df: pd.DataFrame, column: str, source_ids: list):
    """
    REPLACES NULL VALUES IN A SPECIFIED COLUMN WITH RANDOM VALUES FROM THE PROVIDED LIST.
    """
    null_count = df[column].isnull().sum()
    if null_count > 0:
        random_ids = np.random.choice(source_ids, size=null_count)
        df.loc[df[column].isnull(), column] = random_ids
    return df

def normalize_and_standardize(df: pd.DataFrame, numeric_cols: list):
    """
    CREATES A DATAFRAME WITH NORMALIZED AND STANDARDIZED VERSIONS OF NUMERIC FEATURES.
    """
    scaler = MinMaxScaler()
    standardizer = StandardScaler()

    normalized_data = scaler.fit_transform(df[numeric_cols])
    standardized_data = standardizer.fit_transform(df[numeric_cols])

    normalized_df = pd.DataFrame(normalized_data, columns=[f"{col}_normalized" for col in numeric_cols])
    standardized_df = pd.DataFrame(standardized_data, columns=[f"{col}_standardized" for col in numeric_cols])

    return pd.concat([normalized_df, standardized_df], axis=1)


# 2. FETCH AND PREPARE DATA
medical_item = fetch_data("medical_item")
users = fetch_data("users")

medical_item_df = pd.DataFrame(medical_item)
users_df = pd.DataFrame(users)

# 3. FIND AND REPLACE NULL VALUES
if 'medical_item_id' not in users_df.columns:
    users_df['medical_item_id'] = np.nan

users_df = replace_null_with_random(users_df, 'medical_item_id', medical_item_df['id'])

# 4. PERFORM DATA CLEANING
users_df_cleaned = clean_data(users_df)

# 5. JOIN/MERGE DATAFRAMES
merged_df = pd.merge(medical_item_df, users_df_cleaned, left_on='id', right_on='medical_item_id', how='left')

# 6. CREATE DATAFRAME WITH NUMERIC FEATURES
# IDENTIFY NUMERIC COLUMNS
numeric_cols = merged_df.select_dtypes(include=[np.number]).columns.tolist()

# NORMALIZE AND STANDARDIZE NUMERIC FEATURES
numeric_features_df = normalize_and_standardize(merged_df, numeric_cols)

# COMBINE ORIGINAL AND PROCESSED NUMERIC FEATURES
final_df = pd.concat([merged_df, numeric_features_df], axis=1)

# 7. EXPORT DATA
# final_df.to_csv("merged_data_with_features.csv", index=False)
# print("EXPORTED MERGED DATAFRAME WITH NUMERIC FEATURES TO 'MERGED_DATA_WITH_FEATURES.CSV'.")

# PRINT SHAPES TO CONFIRM ALL ROWS IN medical_item_df ARE RETAINED
print("ORIGINAL medical_item_df SHAPE:", medical_item_df.shape)
# print("MERGED DATAFRAME SHAPE:", merged_df.shape)
