## Notebook for transforming raw cpdata to processed


In [None]:
import pandas as pd

In [2]:
# Reading the data 

crop_data_path = 'Phase 2 - Data preparation\Raw_data\crops.csv'
fertilizer_data_path = 'Phase 2 - Data preparation\Raw_data\\fertilizer.csv'

crop = pd.read_csv(crop_data_path) 
fert = pd.read_csv(fertilizer_data_path)

In [3]:
crop.head()

Unnamed: 0,temperature,humidity,ph,rainfall,label
0,20.879744,82.002744,6.502985,202.935536,rice
1,21.770462,80.319644,7.038096,226.655537,rice
2,23.004459,82.320763,7.840207,263.964248,rice
3,26.491096,80.158363,6.980401,242.864034,rice
4,20.130175,81.604873,7.628473,262.71734,rice


In [4]:
fert.head()

Unnamed: 0.1,Unnamed: 0,Crop,N,P,K,pH
0,0,Rice,80,40,40,5.5
1,1,Jowar(Sorghum),80,40,40,5.5
2,2,Barley(JAV),70,40,45,5.5
3,3,Maize,80,40,20,5.5
4,4,Ragi( naachnnii),50,40,20,5.5


In [None]:
# Add 'fert_' prefix to all column names
new_columns1 = ['fert_' + col for col in fert.columns]
new_columns2 = ['crop_' + col for col in crop.columns]

# Rename columns in the DataFrame
fert.columns = new_columns1
crop.columns = new_columns2

# Save the modified dataset to a new CSV file
fert.to_csv('modified_fertilizer_dataset.csv', index=False)
# Save the modified dataset to a new CSV file
crop.to_csv('modified_crop_dataset.csv', index=False)


In [5]:
# Function for lowering the cases
def change_case(i):
    i = i.replace(" ", "")
    i = i.lower()
    return i

In [6]:
fert['fert_name'] = fert['fert_name'].apply(change_case)
fert['fert_type'] = fert['fert_type'].apply(change_case)
fert['fert_toxicity'] = fert['fert_toxicity'].apply(change_case)
fert['fert_solubility'] = fert['fert_solubility'].apply(change_case)

crop['crop_name'] = crop['crop_name'].apply(change_case)
crop['crop_growth_rate'] = crop['crop_growth_rate'].apply(change_case)
crop['crop_type'] = crop['crop_type'].apply(change_case)

In [None]:
crop.head()

In [None]:
crop.tail()

In [10]:
crop_names = crop['crop_name'].unique()
crop_names

array(['rice', 'wheat', 'mungbean', 'tea', 'millet', 'maize', 'lentil',
       'jute', 'coffee', 'cotton', 'groundnut', 'peas', 'rubber',
       'sugarcane', 'tobacco', 'kidneybeans', 'mothbeans', 'coconut',
       'blackgram', 'adzukibeans', 'pigeonpeas', 'chickpea', 'banana',
       'grapes', 'apple', 'mango', 'muskmelon', 'orange', 'papaya',
       'pomegranate', 'watermelon'], dtype=object)

In [11]:
fert.head()

Unnamed: 0.1,Unnamed: 0,Crop,N,P,K,pH
0,0,rice,80,40,40,5.5
1,1,jowar(sorghum),80,40,40,5.5
2,2,barley(jav),70,40,45,5.5
3,3,maize,80,40,20,5.5
4,4,ragi(naachnnii),50,40,20,5.5


## Data Visualization for better feature extraction

In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

# Select only numerical columns for correlation calculation
numerical_df = df.select_dtypes(include=['number'])

# Calculate correlation matrix for numerical columns
plt.figure(figsize=(20, 20))  # Increase figure size for better scaling

# Create heatmap with reduced font sizes
sns.heatmap(numerical_df.corr(), annot=True, annot_kws={"size": 8})  # Reduce annotation font size

plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Example: Heatmap of means for each fertilizer type
mean_values = crop.groupby('crop_name').mean()

plt.figure(figsize=(12, 10))
sns.heatmap(mean_values, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Heatmap of Mean Values of crops')
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Pairplot of the dataset
sns.pairplot(fert)
plt.show()

sns.pairplot(crop)
plt.show()


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Load datasets
fertilizer_df = fert

# Compute the covariance matrix
covariance_matrix = fertilizer_df.cov()

# Visualize the covariance matrix
plt.figure(figsize=(12, 10))
sns.heatmap(covariance_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Covariance Matrix')
plt.show()


# Load datasets
crop_df = crop
# Compute the covariance matrix
covariance_matrix = crop_df.cov()

# Visualize the covariance matrix
plt.figure(figsize=(12, 10))
sns.heatmap(covariance_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Covariance Matrix')
plt.show()
