#### IMPORT LIBRARIES

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pickle

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

pd.options.display.max_columns = None

#### LOAD DATA

In [None]:
df = pd.read_csv('data/pokemon.csv')

df.head()

#### BASIC EDA

In [None]:
df.info()

In [None]:
df.describe()

#### VISUALIZATIONS

In [None]:
## HISTOGRAM - LEGENDARY/NON-LEGENDARY

plt.figure(figsize=(6,4))
sns.countplot(x="is_legendary", data=df, palette="viridis")

plt.title("Distribution of Legendary vs Non-Legendary Pok√©mon")
plt.xlabel("Is Legendary (0 = No, 1 = Yes)")
plt.ylabel("Count")

plt.show()

In [None]:
## SCATTER PLOT: Height vs Weight (bubble by Speed)

plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='height_m', y='weight_kg', hue='type', size='speed', sizes=(20,200))

plt.title('Height vs Weight (bubble by Speed)')

plt.show()

In [None]:
## RADAR CHART

stats_cols = ['hit_points','attack','defense','sp_attack','sp_defense','speed']

def plot_radar(pokemon_row):
    labels = stats_cols
    stats = pokemon_row[stats_cols].values
    angles = np.linspace(0, 2*np.pi, len(labels), endpoint=False).tolist()
    stats = np.concatenate((stats,[stats[0]]))
    angles += angles[:1]
    fig, ax = plt.subplots(figsize=(6,6), subplot_kw=dict(polar=True))
    ax.plot(angles, stats, 'o-', linewidth=2)
    ax.fill(angles, stats, alpha=0.25)
    ax.set_thetagrids(np.degrees(angles[:-1]), labels)
    plt.title(pokemon_row['name'])
    plt.show()

plot_radar(df[df['name'] == 'Pikachu'].squeeze()) # I need .squeeze() to turn df into Series

#### MODELLING

In [None]:
X = df[stats_cols]
y = df['is_legendary']

RSEED = 33

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RSEED, stratify=y)

model = RandomForestClassifier(n_estimators=100, random_state=RSEED)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:,1]

# Save model with pickle
with open('models/random_forest.pkl','wb') as f:
    pickle.dump(model, f)