# MODEL PREDICTING YEAR 2023 and 2024 GVA for every Crop

In [1]:
import warnings
from glob import glob
import glob
import os
import numpy as np
import plotly.express as px
from IPython.display import VimeoVideo
from ipywidgets import Dropdown, FloatSlider, IntSlider, interact
from ipywidgets import interactive, IntSlider, Dropdown
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from ipywidgets import interactive
warnings.simplefilter(action="ignore", category=FutureWarning)

In [2]:
def wrangle(filepath):
    df = pd.read_csv(filepath)

    # Replace specific values with 0
    df.fillna(0, inplace=True)
    df.replace('_', 0, inplace=True)
    df.replace('-', 0, inplace=True)
   
    return df


data_pattern = r"C:\Users\fabiola\Desktop\Project 01\DATA\Table_*.csv"

# the file paths
files = glob.glob(data_pattern)

# list to store DataFrames
frames = []

#  wrangle function
for file in files:
    df = wrangle(file)
    print(f"Processing {file}, df_shape: {df.shape}")
    frames.append(df)

Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_1 (1).csv, df_shape: (162, 7)
Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_10 (1).csv, df_shape: (42, 5)
Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_11 (1).csv, df_shape: (30, 4)
Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_12 (1).csv, df_shape: (27, 3)
Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_13 (1).csv, df_shape: (30, 3)
Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_14 (1).csv, df_shape: (30, 6)
Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_2 (3).csv, df_shape: (180, 4)
Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_3 (1).csv, df_shape: (90, 3)
Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_4 (1).csv, df_shape: (98, 3)
Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_5 (1).csv, df_shape: (1677, 4)
Processing C:\Users\fabiola\Desktop\Project 01\DATA\Table_6 (3).csv, df_shape: (90, 3)
Processing C:\Users\fabiola\Deskto

In [3]:
df=frames[8]

In [4]:
# Function to print outputs in a bordered area
def print_output(title, content):
    print("="*50)
    print(title.center(50))
    print("="*50)
    print(content)
    print("="*50)


crop_column = 'Crop'

# columns for the model
features = ['Year', crop_column]
target = 'GVA_in_price'

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)

# Define a pipeline with preprocessing and modeling steps
pipeline = Pipeline([
    ('preprocessor', ColumnTransformer(
        transformers=[
            ('num', 'passthrough', ['Year']),  # Numeric column
            ('cat', OneHotEncoder(handle_unknown='ignore'), [crop_column])  # Categorical column (crop)
        ],
        remainder='drop'
    )),
    ('regressor', LinearRegression())
])

# Train the model using the pipeline
pipeline.fit(X_train, y_train)

# Get model details
model = pipeline.named_steps['regressor']
intercept = round(model.intercept_, 1)
coefficients = [round(coef, 1) for coef in model.coef_]

# Print the model details within a bordered area
output_title = "Model Details"
output_content = f"Intercept: {intercept}\nCoefficients: {coefficients}"
print_output(output_title, output_content)

# Define sliders and dropdown
years_slider = IntSlider(value=2023, min=2023, max=2024, step=1, description='Year')
crop_dropdown = Dropdown(options=X_train[crop_column].unique(), value=X_train[crop_column].unique()[0], description='Crop')

# Define the interactive function
def predict_gva(year, crop):
    data_to_predict = pd.DataFrame({'Year': [year], 'Crop': [crop]})
    predicted_gva = pipeline.predict(data_to_predict)
    output_title = "Predicted GVA"
    output_content = f"For {crop} in {year}: {round(predicted_gva[0], 1)}"
    print_output(output_title, output_content)

# Create the interactive widget
interactive_plot = interactive(predict_gva, year=years_slider, crop=crop_dropdown)

# Display the interactive widget
interactive_plot


                  Model Details                   
Intercept: -203142533.7
Coefficients: [101054.2, -90334.5, -474416.5, 248204.1, 1559042.1, 199842.9, -644877.9, 659520.6, -561101.6, 755488.0, 298363.2, -555101.2, -830820.7, 25388.1, -589196.5]


interactive(children=(IntSlider(value=2023, description='Year', max=2024, min=2023), Dropdown(description='Cro…