# Retail Price Optimization Analysis

This notebook demonstrates how to interact with the project codebase to fetch data, perform analysis, and train the model interactively.

In [None]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Add parent directory to path to import modules
sys.path.append(os.path.abspath(os.path.join('..')))

from data.managament.retreiver import get_latest_data
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

## 1. Data Retrieval
Fetching data directly from the PostgreSQL database using our project's retriever.

In [None]:
df = get_latest_data()
print(f"Data shape: {df.shape}")
df.head()

## 2. Exploratory Data Analysis (EDA)

In [None]:
if not df.empty:
    plt.figure(figsize=(10, 6))
    sns.histplot(df['unit_price'], bins=30, kde=True)
    plt.title('Distribution of Unit Prices')
    plt.show()
else:
    print("No data to plot.")

## 3. Data Processing

In [None]:
# Simple processing similar to process_data.py
if not df.empty:
    df_processed = df.copy()
    # Categorical Encoding (Example)
    for col in ["product_id", "product_category_name"]:
        if col in df_processed.columns:
            df_processed[col] = df_processed[col].astype("category").cat.codes
            
    # Validation
    df_processed.fillna(0, inplace=True)
    print("Data processed.")

## 4. Model Training

In [None]:
if not df.empty and 'qty' in df_processed.columns:
    X = df_processed.drop(columns=['qty', 'month_year'], errors='ignore')
    y = df_processed['qty']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    preds = model.predict(X_test)
    print(f"RMSE: {np.sqrt(mean_squared_error(y_test, preds))}")
    print(f"R2 Score: {r2_score(y_test, preds)}")