# Deep Learning Exploration Notebook

This notebook is for exploring database files and training neural networks to assign weights.

## Contents
1. Setup and Imports
2. Data Loading and Exploration
3. Data Preprocessing
4. Model Definition and Training
5. Evaluation and Feature Weight Analysis

## 1. Setup and Imports

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from pathlib import Path

# Custom modules
from data_loader import DataProcessor, get_data_loaders
from model import WeightAssignmentNetwork, create_model
from utils import set_seed, get_device

# Set random seed for reproducibility
set_seed(42)

# Get device
device = get_device()
print(f"Using device: {device}")

## 2. Data Loading and Exploration

Upload your database files to the `data/` directory and update the path below.

In [None]:
# Load your data file - update the path to your file
data_path = '../data/your_data_file.csv'  # Change this to your file

# Initialize data processor
processor = DataProcessor()

# Uncomment when you have data:
# df = processor.load_data(data_path)
# print(f"Data shape: {df.shape}")
# df.head()

In [None]:
# Data exploration - uncomment when data is loaded
# print("Data Info:")
# df.info()
# print("\nStatistics:")
# df.describe()

In [None]:
# Visualize data distribution - uncomment when data is loaded
# fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# 
# # Plot distributions of numeric columns
# numeric_cols = df.select_dtypes(include=[np.number]).columns[:4]
# for ax, col in zip(axes.flatten(), numeric_cols):
#     df[col].hist(ax=ax, bins=30)
#     ax.set_title(f'Distribution of {col}')
# 
# plt.tight_layout()
# plt.show()

## 3. Data Preprocessing

In [None]:
# Preprocess data - uncomment when data is loaded
# target_column = 'your_target_column'  # Change this to your target column name
# 
# features, targets = processor.preprocess(df, target_column=target_column)
# print(f"Features shape: {features.shape}")
# print(f"Targets shape: {targets.shape if targets is not None else 'None'}")

In [None]:
# Create data loaders - uncomment when data is loaded
# train_loader, val_loader, _ = get_data_loaders(
#     data_path,
#     target_column=target_column,
#     batch_size=32,
#     train_split=0.8
# )
# 
# print(f"Training batches: {len(train_loader)}")
# print(f"Validation batches: {len(val_loader)}")

## 4. Model Definition and Training

In [None]:
# Create model - uncomment and adjust when data is ready
# input_dim = features.shape[1]
# 
# model = create_model(
#     model_type='weight_assignment',
#     input_dim=input_dim,
#     output_dim=1
# )
# model = model.to(device)
# print(model)

In [None]:
# Training loop - uncomment when ready to train
# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 
# epochs = 50
# train_losses = []
# val_losses = []
# 
# for epoch in range(epochs):
#     # Training
#     model.train()
#     epoch_loss = 0
#     for inputs, targets in train_loader:
#         inputs, targets = inputs.to(device), targets.to(device)
#         
#         optimizer.zero_grad()
#         outputs = model(inputs)
#         loss = criterion(outputs.squeeze(), targets)
#         loss.backward()
#         optimizer.step()
#         epoch_loss += loss.item()
#     
#     train_losses.append(epoch_loss / len(train_loader))
#     
#     # Validation
#     model.eval()
#     val_loss = 0
#     with torch.no_grad():
#         for inputs, targets in val_loader:
#             inputs, targets = inputs.to(device), targets.to(device)
#             outputs = model(inputs)
#             val_loss += criterion(outputs.squeeze(), targets).item()
#     val_losses.append(val_loss / len(val_loader))
#     
#     if (epoch + 1) % 10 == 0:
#         print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_losses[-1]:.4f} - Val Loss: {val_losses[-1]:.4f}")

## 5. Evaluation and Feature Weight Analysis

In [None]:
# Plot training history - uncomment after training
# plt.figure(figsize=(10, 5))
# plt.plot(train_losses, label='Training Loss')
# plt.plot(val_losses, label='Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.title('Training History')
# plt.legend()
# plt.show()

In [None]:
# Visualize learned feature weights - uncomment after training
# feature_weights = model.get_feature_weights().cpu().numpy()
# 
# plt.figure(figsize=(12, 6))
# plt.bar(range(len(feature_weights)), feature_weights)
# plt.xlabel('Feature Index')
# plt.ylabel('Weight')
# plt.title('Learned Feature Importance Weights')
# 
# # Add feature names if available
# if processor.feature_columns:
#     plt.xticks(range(len(feature_weights)), processor.feature_columns, rotation=45, ha='right')
# 
# plt.tight_layout()
# plt.show()

In [None]:
# Save model - uncomment after training
# from utils import save_model
# save_model(model, '../models', 'trained_model.pt')
# print("Model saved!")