# Ice Cream Sales â€” Training Notebook

This notebook trains two classifiers (Decision Tree and SVM) on the provided `Ice_Cream.csv` dataset.
It creates a binary label `HighRevenue` (Revenue >= median), fits models, evaluates them, and saves artifacts into a `models/` folder.
Running the notebook will also write a deployable `app.py` Streamlit file into the project root.

In [None]:
# Imports
import os
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print('pandas', pd.__version__)
print('numpy', np.__version__)


In [None]:
# Load dataset
# Use __file__ when available (interactive run), otherwise fall back to current working directory (nbconvert)
try:
    base_dir = os.path.dirname(__file__)
except NameError:
    base_dir = os.getcwd()
csv_path = os.path.join(base_dir, 'Ice_Cream.csv')
df = pd.read_csv(csv_path)
print('Loaded dataset with shape:', df.shape)
df.head()

In [None]:
# Create binary label using the median revenue
median_revenue = df['Revenue'].median()
df['HighRevenue'] = (df['Revenue'] >= median_revenue).astype(int)
print('Median revenue threshold:', median_revenue)
print(df['HighRevenue'].value_counts())


In [None]:
# Prepare features and labels
X = df[['Temperature']].values
y = df['HighRevenue'].values

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print('Train shape:', X_train.shape, 'Test shape:', X_test.shape)


In [None]:
# Train Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
dt_acc = accuracy_score(y_test, y_pred_dt)
print('Decision Tree accuracy:', dt_acc)
print('
Decision Tree classification report:
', classification_report(y_test, y_pred_dt))

# Train SVM with scaling
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)
svm = SVC(kernel='rbf', probability=True, random_state=42)
svm.fit(X_train_s, y_train)
y_pred_svm = svm.predict(X_test_s)
svm_acc = accuracy_score(y_test, y_pred_svm)
print('SVM accuracy:', svm_acc)
print('
SVM classification report:
', classification_report(y_test, y_pred_svm))


In [None]:
# Save models and artifacts
out_dir = os.path.join(os.path.dirname(__file__), 'models')
os.makedirs(out_dir, exist_ok=True)
joblib.dump(dt, os.path.join(out_dir, 'decision_tree.joblib'))
joblib.dump(svm, os.path.join(out_dir, 'svm_model.joblib'))
joblib.dump(scaler, os.path.join(out_dir, 'scaler.joblib'))
results = {
    'decision_tree': {'accuracy': dt_acc, 'confusion_matrix': confusion_matrix(y_test, y_pred_dt).tolist()},
    'svm': {'accuracy': svm_acc, 'confusion_matrix': confusion_matrix(y_test, y_pred_svm).tolist()}
}
joblib.dump(results, os.path.join(out_dir, 'results.joblib'))
print('Saved artifacts to', out_dir)
