<a href="https://colab.research.google.com/github/Amulyanrao7777/ML/blob/main/lab3_gradientboosting_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load data
df = pd.read_csv('/content/PDO_wine_data_IT_FR.csv', delimiter=';', encoding='utf-8-sig')

# Feature engineering
df['Registration'] = pd.to_datetime(df['Registration'], errors='coerce')
df['Registration_Year'] = df['Registration'].dt.year

for col in ['Max_yield_hl', 'Max_yield_kg', 'Min_density']:
    df[col] = pd.to_numeric(df[col].replace('na', np.nan), errors='coerce').fillna(0)

df['Main_var_count'] = df['Main_var'].str.count(';') + 1
df['Second_var_count'] = df['Second_var'].str.count(';').fillna(0) + 1

le_country = LabelEncoder()
le_category = LabelEncoder()
df['Country_encoded'] = le_country.fit_transform(df['Country'])
df['Category_encoded'] = le_category.fit_transform(df['Category'])

# Prepare data
features = ['Max_yield_hl', 'Max_yield_kg', 'Min_density', 'Registration_Year',
            'Main_var_count', 'Second_var_count', 'Country_encoded', 'Category_encoded']
X = df[features]
y = LabelEncoder().fit_transform(df['Color'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
gb.fit(X_train, y_train)

# Evaluate
y_pred = gb.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(classification_report(y_test, y_pred))

# Predict for new wine
def predict_wine(max_yield_hl, max_yield_kg, min_density, registration_year,
                 main_var_count, second_var_count, country, category):
    new_data = pd.DataFrame({
        'Max_yield_hl': [max_yield_hl],
        'Max_yield_kg': [max_yield_kg],
        'Min_density': [min_density],
        'Registration_Year': [registration_year],
        'Main_var_count': [main_var_count],
        'Second_var_count': [second_var_count],
        'Country_encoded': [le_country.transform([country])[0]],
        'Category_encoded': [le_category.transform([category])[0]]
    })
    prediction = gb.predict(new_data)[0]
    colors = ['Red', 'Ros√©', 'White']
    return colors[prediction]

# Example prediction
result = predict_wine(98, 14000, 2000, 2011, 2, 35, 'IT', 'Wine')
print(f"\nPredicted color: {result}")



Accuracy: 0.7996
              precision    recall  f1-score   support

           0       0.77      0.83      0.80       466
           1       0.45      0.23      0.30       101
           2       0.86      0.88      0.87       531

    accuracy                           0.80      1098
   macro avg       0.69      0.65      0.66      1098
weighted avg       0.78      0.80      0.79      1098


Predicted color: White
