In [63]:
import pandas as pd

df = pd.read_csv("Pokemon.csv")
df.head(10)

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
5,5,Charmeleon,Fire,,405,58,64,58,80,65,80,1,False
6,6,Charizard,Fire,Flying,534,78,84,78,109,85,100,1,False
7,6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False
8,6,CharizardMega Charizard Y,Fire,Flying,634,78,104,78,159,115,100,1,False
9,7,Squirtle,Water,,314,44,48,65,50,64,43,1,False


Name: Pokemon'un ismi (Bulbasaur, Ivysaur, vs.)
//Generation: Hangi nesil Pokemon'u (1, 2, 3... 7'ye kadar)
//Type 1: Ana tipi (Grass, Fire, Water, vs.) - Her pokemon'da var
//Type 2: İkinci tipi (Poison, Flying, Dragon, vs.) - Bazılarında yok (NaN)
//Legendary: Efsanevi pokemon mu? (True/False)

Güç İstatistikleri (Stats)

HP: Can/Sağlık puanı (ne kadar dayanır)
//Attack: Fiziksel saldırı gücü
//Defense: Fiziksel savunma
//Sp. Atk: Özel saldırı gücü (Special Attack)
//Sp. Def: Özel savunma (Special Defense)
//Speed: Hız (kim önce saldırır)
//Total: Tüm stats'ların toplamı (genel güç göstergesi)

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df.describe().T

In [67]:
df.shape

(800, 13)

In [68]:
df.columns

Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
       'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')

etiketli verimiz "Legendary"

In [None]:
from sklearn.preprocessing import LabelEncoder

def preprocessing(df):
    
    df = df.copy()
    df["Type 2"] = df["Type 2"].fillna("None")   #type2 değeri boşsa None ile doldur
     
    le_type1 = LabelEncoder()
    le_type2 = LabelEncoder() 
    
    df["Type 1"] = le_type1.fit_transform(df["Type 1"])
    df["Type 2"] = le_type2.fit_transform(df["Type 2"]) #kategorik değişkenleri label encoding ile dönüştürüyoruz
    
    
    #feature engineering
    df["Physical_Power"] = df["Attack"] + df["Defense"]                                     #toplam fiziksel güç   
    df["Special_Power"] = df["Sp. Atk"] + df["Sp. Def"]                                     #toplam özel güç
    df["Durability"] = df["HP"] + df["Defense"] + df["Sp. Def"]                             #genel dayanıklılık
    df["Attack_Power"] = df["Attack"] + df["Sp. Atk"]                                       #saldırı gücü
    df["Power_Category"] = pd.cut(df["Total"],
                            bins=[0,400,500,600,800], 
                            labels=[0,1,2,3]) #weak, average, strong, legendary
    df["Power_Category"] = df["Power_Category"].astype(int)                                 #güç kategorisi oluşturduk, total stats a göre
    df["Speed_Category"] = pd.cut(df['Speed'], 
                                  bins=[0, 50, 80, 120, 200], 
                                  labels=[0, 1, 2, 3])  #slow, average, fast, very fast
    df["Speed_Category"] = df["Speed_Category"].astype(int)                                 #burada da aynısını hız için yaptık
    
    df = df.drop(['#'], axis=1)
    
    return df



In [70]:
df = preprocessing(df)

In [None]:
df.head()

In [86]:
X = df.drop(["Legendary", "Name"], axis=1)
y = df["Legendary"]

In [87]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [88]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier

In [89]:
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'SVM': Pipeline([
        ('scaler', StandardScaler()),
        ('svm', SVC(random_state=42))
    ]),
    'KNN': Pipeline([
        ('scaler', StandardScaler()),
        ('knn', KNeighborsClassifier())
    ]),
    'XGBoost': xgb.XGBClassifier(random_state=42),
    
    'LightGBM': lgb.LGBMClassifier(random_state=42, verbose=-1),
    
    'CatBoost': CatBoostClassifier(random_state=42, verbose=False)
}

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np

# Modelleri eğitme ve değerlendirme
results = {}
for name, model in models.items():
    print(f"\n{name} modeli eğitiliyor...")
    # Model eğitimi
    model.fit(X_train, y_train)

    # Tahminler
    y_pred = model.predict(X_test)

    # Sonuçları kaydetme
    results[name] = {
        'accuracy': accuracy_score(y_test, y_pred),
        'classification_report': classification_report(y_test, y_pred),
        'confusion_matrix': confusion_matrix(y_test, y_pred)
    }

    print(f"\n{name} Sonuçları:")
    print(f"Doğruluk: {results[name]['accuracy']:.4f}")
    print("\nSınıflandırma Raporu:")
    print(results[name]['classification_report'])

    # Karmaşıklık matrisini görselleştirme
    plt.figure(figsize=(8, 6))
    sns.heatmap(results[name]['confusion_matrix'], annot=True, fmt='d', cmap='Blues')
    plt.title(f'{name} Karmaşıklık Matrisi')
    plt.ylabel('Gerçek Değerler')
    plt.xlabel('Tahmin Edilen Değerler')
    plt.show()

In [95]:
# En iyi modeli bulma
best_model_name = max(results.items(), key=lambda x: x[1]['accuracy'])[0]
print(f"\nEn iyi model: {best_model_name} (Doğruluk: {results[best_model_name]['accuracy']:.4f})")



En iyi model: XGBoost (Doğruluk: 0.9750)
