In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression

# Load dataset
file_path = "new.csv"
df = pd.read_csv(file_path)

# Clean column names and missing values
df.columns = df.columns.str.replace("_x0020_", "_")
df.dropna(inplace=True)

# Convert date and sort
df['Arrival_Date'] = pd.to_datetime(df['Arrival_Date'])
df.sort_values(by='Arrival_Date', inplace=True)

# Encode categorical columns
label_encoders = {}
categorical_cols = ['State', 'District', 'Market', 'Commodity', 'Variety', 'Grade']

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Add numerical day number
df['Day_Number'] = (df['Arrival_Date'] - df['Arrival_Date'].min()).dt.days

# Define features and target
X = df[['State', 'District', 'Market', 'Commodity', 'Variety', 'Grade', 'Day_Number', 'Min_Price', 'Max_Price']]
y = df['Modal_Price']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Prediction function
def predict_commodity_price(commodity_name: str):
    user_commodity = commodity_name.title()

    if user_commodity not in label_encoders['Commodity'].classes_:
        return {"error": "Commodity not found. Please check spelling."}
    
    encoded_commodity = label_encoders['Commodity'].transform([user_commodity])[0]
    commodity_data = df[df['Commodity'] == encoded_commodity]

    if commodity_data.empty:
        return {"error": "No data available for this commodity."}

    latest_entry = commodity_data.iloc[-1].copy()
    current_price = latest_entry['Modal_Price']

    future_days = [0, 3, 7, 15]
    forecast_labels = ["Today", "In 3 days", "Next week", "Next 15 days"]
    forecasted_prices = {}

    for days, label in zip(future_days, forecast_labels):
        latest_entry['Day_Number'] += days
        future_price = lr_model.predict([latest_entry[X.columns]])[0]
        forecasted_prices[label] = future_price

    return {"current_price": current_price, "forecast": forecasted_prices}
