# 1) Logistic Regression Model to find MSP of Crops

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pickle

# Load your CSV file (ensure to provide the correct path)
data = pd.read_csv(r"C:\Users\Mahendar\flaskproject\Crops_price\RS_Session_262_AU_641_A_i.csv")
# Find the index of rows where 'Commodity (Variety)' is 'Other Crops'
indexes_to_drop = data[data['Commodity (Variety )'] == 'Other Crops'].index

# Drop these rows
data = data.drop(indexes_to_drop)

# Create a DataFrame from the data
df = pd.DataFrame({
    "Sl. No.": data['Sl. No.'],
    "Commodity (Variety )": data['Commodity (Variety )'],
    "Category": data['Category'],
    "2018-19": data['2018-19'],
    "2019-20": data['2019-20'],
    "2020-21": data['2020-21'],
    "2021-22": data['2021-22'],
    "2022-23": data['2022-23'],
    "2023-24": data['2023-24']
})

# Step 1: Initialize the model
model = LinearRegression()

# Step 2: Define X_future values (e.g., 2024 to 2028)
X_future = np.array([2024, 2025, 2026, 2027, 2028]).reshape(-1, 1)

# Step 3: Store predicted values along with the corresponding years and crops
results_list = []
train_results_list = []

# Iterate over unique crops and train/predict for each
for crop in df['Commodity (Variety )'].unique():
    # Filter data for the specific crop
    crop_data = df[df['Commodity (Variety )'] == crop]
    
    # Prepare training data
    X_train = np.array([2018, 2019, 2020, 2021, 2022, 2023]).reshape(-1, 1)
    Y_train = crop_data[['2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24']].values.flatten()  # Flatten Y_train

    # Check if we have enough data to train
    if len(Y_train) == 6:
        model.fit(X_train, Y_train)
        
        # Step 4: Predict on the training data
        Y_train_pred = model.predict(X_train)
        
        # Calculate accuracy metrics
        mae = mean_absolute_error(Y_train, Y_train_pred)
        mse = mean_squared_error(Y_train, Y_train_pred)
        r2 = r2_score(Y_train, Y_train_pred)
        
        # Store metrics
        train_results_list.append({
            'Crop': crop,
            'MAE': mae,
            'MSE': mse,
            'R²': r2
        })

        # Predict future prices
        Y_pred_future = model.predict(X_future)

        # Store predictions
        for i, year in enumerate(X_future.flatten()):
            results_list.append({
                'Crop': crop,
                'Year': year,
                'Predicted Price': Y_pred_future[i]
            })

# Create DataFrames from the results lists
predictions_df = pd.DataFrame(results_list)
train_metrics_df = pd.DataFrame(train_results_list)

# Display the DataFrames
print(predictions_df)
print(train_metrics_df)

# Save the model
with open('linear_regression_model2.sav', 'wb') as file:
    pickle.dump(model, file)

# Save predictions to CSV
predictions_df.to_csv('predictions2.csv', index=False)

# Save training metrics to CSV
train_metrics_df.to_csv('training_metrics.csv', index=False)


               Crop  Year  Predicted Price
0    Paddy (Common)  2024      2223.866667
1    Paddy (Common)  2025      2307.066667
2    Paddy (Common)  2026      2390.266667
3    Paddy (Common)  2027      2473.466667
4    Paddy (Common)  2028      2556.666667
..              ...   ...              ...
125            Jute  2024      5305.000000
126            Jute  2025      5574.285714
127            Jute  2026      5843.571429
128            Jute  2027      6112.857143
129            Jute  2028      6382.142857

[130 rows x 3 columns]
                               Crop         MAE            MSE        R²
0                    Paddy (Common)   24.933333     749.355556  0.964213
1                   Paddy (Grade A)   24.933333     749.355556  0.964213
2                    Jowar (Hybrid)   46.000000    2842.895238  0.956566
3                  Jowar (Maldandi)   51.209524    3440.117460  0.950244
4                             Bajra   18.571429     452.380952  0.987662
5                     

## find Average accuracy

In [5]:

total_accuracy = {
    'Average MAE': train_metrics_df['MAE'].mean(),
    'Average MSE': train_metrics_df['MSE'].mean(),
    'Average R²': train_metrics_df['R²'].mean()
}

print("Overall Model Accuracy Metrics:")
for metric, value in total_accuracy.items():
    print(f"{metric}: {value:.4f}")

Overall Model Accuracy Metrics:
Average MAE: 87.6241
Average MSE: 25327.1874
Average R²: 0.9516


# 2) Random Forest Model to find MSP of Crops

In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pickle

# Load your CSV file (ensure to provide the correct path)
data = pd.read_csv(r"C:\Users\Mahendar\flaskproject\Crops_price\RS_Session_262_AU_641_A_i.csv")
# Find the index of rows where 'Commodity (Variety)' is 'Other Crops'
indexes_to_drop = data[data['Commodity (Variety )'] == 'Other Crops'].index

# Drop these rows
data = data.drop(indexes_to_drop)

# Create a DataFrame from the data
df = pd.DataFrame({
    "Sl. No.": data['Sl. No.'],
    "Commodity (Variety )": data['Commodity (Variety )'],
    "Category": data['Category'],
    "2018-19": data['2018-19'],
    "2019-20": data['2019-20'],
    "2020-21": data['2020-21'],
    "2021-22": data['2021-22'],
    "2022-23": data['2022-23'],
    "2023-24": data['2023-24']
})

# Step 1: Initialize the Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Step 2: Define X_future values (e.g., 2024 to 2028)
X_future = np.array([2024, 2025, 2026, 2027, 2028]).reshape(-1, 1)

# Step 3: Store predicted values along with the corresponding years and crops
results_list = []
train_results_list = []

# Iterate over unique crops and train/predict for each
for crop in df['Commodity (Variety )'].unique():
    # Filter data for the specific crop
    crop_data = df[df['Commodity (Variety )'] == crop]
    
    # Prepare training data
    X_train = np.array([2018, 2019, 2020, 2021, 2022, 2023]).reshape(-1, 1)
    Y_train = crop_data[['2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24']].values.flatten()  # Flatten Y_train

    # Check if we have enough data to train
    if len(Y_train) == 6:
        model.fit(X_train, Y_train)
        
        # Step 4: Predict on the training data
        Y_train_pred = model.predict(X_train)
        
        # Calculate accuracy metrics
        mae = mean_absolute_error(Y_train, Y_train_pred)
        mse = mean_squared_error(Y_train, Y_train_pred)
        r2 = r2_score(Y_train, Y_train_pred)
        
        # Store metrics
        train_results_list.append({
            'Crop': crop,
            'MAE': mae,
            'MSE': mse,
            'R²': r2
        })

        # Predict future prices
        Y_pred_future = model.predict(X_future)

        # Store predictions
        for i, year in enumerate(X_future.flatten()):
            results_list.append({
                'Crop': crop,
                'Year': year,
                'Predicted Price': Y_pred_future[i]
            })

# Create DataFrames from the results lists
predictions_df = pd.DataFrame(results_list)
train_metrics_df = pd.DataFrame(train_results_list)

# Display the DataFrames
print(predictions_df)
print(train_metrics_df)

# Save the model
with open('random_forest_model.sav', 'wb') as file:
    pickle.dump(model, file)

# Save predictions to CSV
predictions_df.to_csv('predictions2.csv', index=False)

# Save training metrics to CSV
train_metrics_df.to_csv('training_metrics_rf.csv', index=False)


               Crop  Year  Predicted Price
0    Paddy (Common)  2024          2128.37
1    Paddy (Common)  2025          2128.37
2    Paddy (Common)  2026          2128.37
3    Paddy (Common)  2027          2128.37
4    Paddy (Common)  2028          2128.37
..              ...   ...              ...
125            Jute  2024          4930.50
126            Jute  2025          4930.50
127            Jute  2026          4930.50
128            Jute  2027          4930.50
129            Jute  2028          4930.50

[130 rows x 3 columns]
                               Crop         MAE            MSE        R²
0                    Paddy (Common)   21.645000     706.216383  0.966273
1                   Paddy (Grade A)   21.645000     706.216383  0.966273
2                    Jowar (Hybrid)   36.666667    1974.942933  0.969827
3                  Jowar (Maldandi)   37.750000    2198.213767  0.968206
4                             Bajra   27.416667    1100.291667  0.969992
5                     

## find Average accuracy

In [13]:
total_accuracy = {
    'Average MAE': train_metrics_df['MAE'].mean(),
    'Average MSE': train_metrics_df['MSE'].mean(),
    'Average R²': train_metrics_df['R²'].mean()
}

print("Overall Model Accuracy Metrics:")
for metric, value in total_accuracy.items():
    print(f"{metric}: {value:.4f}")

Overall Model Accuracy Metrics:
Average MAE: 74.5590
Average MSE: 15741.1273
Average R²: 0.9605
