In [1]:
#Import DataManupulation Library
import pandas as pd
import numpy as np

#Import Visualization Libraries
import matplotlib.pyplot as plt
import seaborn as sns

#Import Warning Library
import warnings
warnings.filterwarnings('ignore')

#Import machine learning libraries
from sklearn.preprocessing import StandardScaler,RobustScaler,MinMaxScaler
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

#Import Warnings
import logging
logging.basicConfig(level=logging.INFO,
                    filename='model.log',
                    filemode='w',
                    format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
url="https://raw.githubusercontent.com/Saimehtre18/Crop_Recommendation_Model/refs/heads/main/Crop_Recommendation.csv"

df=pd.read_csv(url)
df

Unnamed: 0,Nitrogen,Phosphorus,Potassium,Temperature,Humidity,pH_Value,Rainfall,Crop
0,90,42,43,20.879744,82.002744,6.502985,202.935536,Rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,Rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,Rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,Rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,Rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,Coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,Coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,Coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,Coffee


In [7]:
#Performing Univariate Analysis

from collections import OrderedDict

stats=[]

for i in df.columns:
    if df[i].dtypes !='object':
        numerical_stats=OrderedDict({
            'feature':i,
            'Mean':df[i].mean(),
            'Median':df[i].median(),
            'Mode':df[i].mode().values[0],
            'Variance':df[i].var(),
            'Standard Deviation':df[i].std(),
            'Minimum':df[i].min(),
            '25th Percentile':df[i].quantile(0.25),
            '50th Percentile':df[i].quantile(0.50),
            '75th Percentile':df[i].quantile(0.75),
            'Maximum':df[i].max(),
            'Skewness':df[i].skew(),
            'Kurtosis':df[i].kurtosis()
        })
        
        stats.append(numerical_stats)
stats=pd.DataFrame(stats)
stats

Unnamed: 0,feature,Mean,Median,Mode,Variance,Standard Deviation,Minimum,25th Percentile,50th Percentile,75th Percentile,Maximum,Skewness,Kurtosis
0,Nitrogen,50.551818,37.0,22.0,1362.889537,36.917334,0.0,21.0,37.0,84.25,140.0,0.509721,-1.05824
1,Phosphorus,53.362727,51.0,60.0,1088.06846,32.985883,5.0,28.0,51.0,68.0,145.0,1.010773,0.860279
2,Potassium,48.149091,32.0,17.0,2565.212869,50.647931,5.0,20.0,32.0,49.0,205.0,2.375167,4.449354
3,Temperature,25.616244,25.598693,8.825675,25.64155,5.063749,8.825675,22.769375,25.598693,28.561654,43.675493,0.184933,1.232555
4,Humidity,71.481779,80.473146,14.25804,495.677307,22.263812,14.25804,60.261953,80.473146,89.948771,99.981876,-1.091708,0.302134
5,pH_Value,6.46948,6.425045,3.504752,0.59898,0.773938,3.504752,5.971693,6.425045,6.923643,9.935091,0.283929,1.655581
6,Rainfall,103.463655,94.867624,20.211267,3020.424469,54.958389,20.211267,64.551686,94.867624,124.267508,298.560117,0.965756,0.607079
