In [1]:
from zipfile import ZipFile
with ZipFile('/content/Data.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

## Install necessary libraries

### Subtask:
Install `geopandas` and its dependencies to handle geospatial data.


**Reasoning**:
Install geopandas using pip.



In [2]:
!pip install geopandas



**Reasoning**:
Verify the installation by importing geopandas.



In [3]:
import geopandas as gpd

**Reasoning**:
Load the CSV file into a pandas DataFrame and display the first few rows.



## Merge CSV files

### Subtask:
Load three specific CSV files and merge them into a single pandas DataFrame.

**Reasoning**:
Load each of the three CSV files into separate DataFrames using `pd.read_csv()`. Then, concatenate these DataFrames into a single DataFrame using `pd.concat()`.

In [8]:
import pandas as pd

# Define the desired columns
desired_columns = ['DISTRICT', 'BLOCK', 'PANCHAYAT', 'VILLAGE', 'LATITUDE', 'LONGITUDE', 'pH', 'P', 'K', 'Ca', 'Mg']

# Load the three CSV files, selecting only the desired columns and renaming them to uppercase
df1 = pd.read_csv('/content/Data/Chelannur.csv')[['District', 'Block', 'Panchayat', 'Village', 'Latitude', 'Longitude','pH', 'P', 'K', 'Ca', 'Mg']]
df2 = pd.read_csv('/content/Data/Kannur_Ezhome.csv')[['District', 'Block', 'Panchayat', 'Village', 'Latitude', 'Longitude', 'pH', 'P', 'K', 'Ca', 'Mg']]
df3 = pd.read_csv('/content/Data/Nedumangadu.csv')[['District', 'Block', 'Panchayat', 'Village', 'Latitude', 'Longitude', 'pH', 'P', 'K', 'Ca', 'Mg']]
df4 = pd.read_csv('/content/Data/Pampady.csv')[['District', 'Block', 'Panchayat', 'Village', 'Latitude', 'Longitude', 'pH', 'P', 'K', 'Ca', 'Mg']]
df5 = pd.read_csv('/content/Data/Vazhoor.csv')[['District', 'Block', 'Panchayat', 'Village', 'Latitude', 'Longitude', 'pH', 'P', 'K', 'Ca', 'Mg']]

# Standardize column names to uppercase before concatenating
df1.columns = df1.columns.str.upper()
df2.columns = df2.columns.str.upper()
df3.columns = df3.columns.str.upper()
df4.columns = df4.columns.str.upper()
df5.columns = df5.columns.str.upper()

print('df1:', df1.columns)
print('df2:',df2.columns)
print('df3:',df3.columns)
print('df4:',df4.columns)
print('df5:',df5.columns)



df1: Index(['DISTRICT', 'BLOCK', 'PANCHAYAT', 'VILLAGE', 'LATITUDE', 'LONGITUDE',
       'PH', 'P', 'K', 'CA', 'MG'],
      dtype='object')
df2: Index(['DISTRICT', 'BLOCK', 'PANCHAYAT', 'VILLAGE', 'LATITUDE', 'LONGITUDE',
       'PH', 'P', 'K', 'CA', 'MG'],
      dtype='object')
df3: Index(['DISTRICT', 'BLOCK', 'PANCHAYAT', 'VILLAGE', 'LATITUDE', 'LONGITUDE',
       'PH', 'P', 'K', 'CA', 'MG'],
      dtype='object')
df4: Index(['DISTRICT', 'BLOCK', 'PANCHAYAT', 'VILLAGE', 'LATITUDE', 'LONGITUDE',
       'PH', 'P', 'K', 'CA', 'MG'],
      dtype='object')
df5: Index(['DISTRICT', 'BLOCK', 'PANCHAYAT', 'VILLAGE', 'LATITUDE', 'LONGITUDE',
       'PH', 'P', 'K', 'CA', 'MG'],
      dtype='object')


In [18]:
# Merge the dataframes
merged_df = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)

# Display the first few rows of the merged dataframe
display(merged_df.head())

# Display the shape of the merged dataframe to verify the merge
print(f"Shape of the merged dataframe: {merged_df.shape}")

Unnamed: 0,DISTRICT,BLOCK,PANCHAYAT,VILLAGE,LATITUDE,LONGITUDE,PH,P,K,CA,MG
0,Kozhikode,Chelannur,Chelannur,Chelannur,11.354,75.779,6.3,3.36,262.86,442.65,57.45
1,Kozhikode,Chelannur,Chelannur,Chelannur,11.352,75.78,6.3,2.24,264.43,373.75,28.4
2,Kozhikode,Chelannur,Chelannur,Chelannur,11.358,75.78,5.9,4.48,171.36,182.95,21.81
3,Kozhikode,Chelannur,Chelannur,Chelannur,11.362,75.779,6.3,4.48,337.12,274.25,31.9
4,Kozhikode,Chelannur,Chelannur,Chelannur,11.364,75.781,7.2,12.32,972.94,213.25,41.5


Shape of the merged dataframe: (6830, 11)


In [11]:
merged_df.describe()

Unnamed: 0,LATITUDE,LONGITUDE,PH,P,K,MG
count,6830.0,6830.0,6830.0,6830.0,6830.0,6830.0
mean,10.164483,76.332374,5.978571,48.968876,317.41214,168.437465
std,1.198596,0.614294,0.801064,45.127264,195.896644,118.971234
min,8.540138,75.2305,0.2,1.12,10.86,0.3
25%,9.520133,75.802,5.4,18.02,185.5975,82.7875
50%,9.601685,76.654433,6.0,32.48,269.975,138.52
75%,11.383,76.737877,6.5,62.54,398.215,239.58
max,12.107792,77.048711,8.0,313.6,1200.0,500.0


In [20]:
#Save File
merged_df.to_csv("Merged_Soil_Data.csv")