In [None]:
!pip install pycaret

<center><h1 class="list-group-item list-group-item-success">CROP PRICE PREDICTION</h1></center>
<br>
<div>
    <div class="row justify-content-space-between">
        <div class="col-lg-6" style="padding:1em;height:100%">
            <img style="height:26em" src = "https://media.nationalgeographic.org/assets/photos/120/983/091a0e2f-b93d-481b-9a60-db520c87ec33.jpg" >
        </div>
        <div class="col-lg-6" style="padding:1em;">
            <div>
                <h3 style="font-size:1.4em">Context</h3>
                  <p>This dataset is focused on the crops which can reflect the price area and other attributes from that one can predict for future price yield by using some better attributes.</p>
            </div><br>
            <div>
                <h3 style="font-size:1.4em;margin:2em 0">Contents</h3>
                <ul style = "color:blue">
                    <li>Importing Packages</li>
                    <li>Importing Data</li>
                    <li>Analysing Data</li>
                    <li>Data Overview</li>
                    <li>Transforming data to required format</li>
                    <li>One Hot encoding</li>
                    <li>Model Comparison</li>
                    <li>Training Models</li>
                    <li>Evaluation Metrics</li>
                </ul>
            </div>
        </div>
    </div>
</div>



## Importing Packages

In [None]:
import pandas as pd
import numpy as np
import category_encoders as ce
from sklearn.model_selection import train_test_split
from pycaret.regression import *
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
import pandas_profiling as pp
import matplotlib.pyplot as plt
import seaborn as sns

## Importing Data

In [None]:
df = pd.read_csv("../input/crop-price-prediction/corn yield.csv")

## Analysing Data

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
for i in df:
    print(i,df[i].value_counts(),sep="\n",end="\n")

In [None]:
cols_to_be_removed = ["Program","Week Ending","Ag District","Ag District Code","County","County ANSI","Zip Code","Region","Watershed","CV (%)","Domain Category","Commodity","Geo Level","watershed_code","Domain"]

In [None]:
df.drop(columns=cols_to_be_removed,axis = 1,inplace=True)

In [None]:
prof = pp.ProfileReport(df)

## Data Overview

In [None]:
prof

## Transforming data to required format

In [None]:
df["State ANSI"].fillna(0,inplace=True)

In [None]:
df.isnull().sum()

In [None]:
for i in df:
    print(i,df[i].value_counts(),sep="\n",end="\n")

In [None]:
df["Value"] = df["Value"].str.replace(',','')
df["State ANSI"] = df["State ANSI"].astype("int")
df["Value"] = df["Value"].astype("float")

## One Hot encoding

In [None]:
def one_hot_encoding(df,column):
    one_hot_encoder=ce.OneHotEncoder(cols=column,return_df=True,use_cat_names=True)
    df_final = one_hot_encoder.fit_transform(df)
    return df_final

In [None]:
df = one_hot_encoding(df,"Period")
df = one_hot_encoding(df,"Data Item")
df = one_hot_encoding(df,"State")

In [None]:
X = df.drop("Value",axis = 1)
Y = df["Value"]

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,random_state=27,test_size=0.2)

## Model Comparison

In [None]:
train,test = train_test_split(df, test_size=0.25,random_state= 27)

In [None]:
experiment = setup(
    data = train,
    target = 'Value',
    imputation_type='iterative'
)

In [None]:
compare_models()

<b>As Random Forest Regressor gives more R<sup>2</sup> value we choose them for training our model 🔥</b>

## Training Models

In [None]:
random_reg = RandomForestRegressor(n_estimators = 100, random_state = 0)

In [None]:
random_reg.fit(X_train, Y_train)

In [None]:
Y_pred = random_reg.predict(X_test)

## Evaluation Metrics

In [None]:
mse = mean_squared_error(Y_pred,Y_test)
mse

In [None]:
mae = mean_absolute_error(Y_pred,Y_test)
mae

In [None]:
r2_score = r2_score(Y_pred,Y_test)
r2_score

In [None]:
plot = sns.regplot(Y_pred,Y_test)
plot.set(xlabel="Predicted",ylabel = "Actual")


### Insights
&emsp;The R<sup>2</sup> value between predicted and actual values shows a strong positive correlation


# Thank You 🤗
### I hope you had a good time reading my notebook. Pls do support and comment! 😎