In [1]:
from zipfile import ZipFile
with ZipFile('/content/Data.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

## Install necessary libraries

### Subtask:
Install `geopandas` and its dependencies to handle geospatial data.


**Reasoning**:
Install geopandas using pip.



In [None]:
!pip install geopandas



**Reasoning**:
Verify the installation by importing geopandas.



In [2]:
import geopandas as gpd

**Reasoning**:
Load the CSV file into a pandas DataFrame and display the first few rows.



## Merge CSV files

### Subtask:
Load three specific CSV files and merge them into a single pandas DataFrame.

**Reasoning**:
Load each of the three CSV files into separate DataFrames using `pd.read_csv()`. Then, concatenate these DataFrames into a single DataFrame using `pd.concat()`.

In [3]:
import pandas as pd

# Define the desired columns
desired_columns = ['DISTRICT', 'BLOCK', 'PANCHAYAT', 'VILLAGE', 'LATITUDE', 'LONGITUDE', 'pH', 'P', 'K', 'Ca', 'Mg']

# Load the three CSV files, selecting only the desired columns and renaming them to uppercase
df1 = pd.read_csv('/content/Data/Pampady.csv')[['District', 'Block', 'Panchayat', 'Village', 'Latitude', 'Longitude','pH', 'P', 'K', 'Ca', 'Mg']]
df2 = pd.read_csv('/content/Data/Vazhoor.csv')[['District', 'Block', 'Panchayat', 'Village', 'Latitude', 'Longitude', 'pH', 'P', 'K', 'Ca', 'Mg']]

# Standardize column names to uppercase before concatenating
df1.columns = df1.columns.str.upper()
df2.columns = df2.columns.str.upper()

print('df1:', df1.columns)
print('df2:',df2.columns)


df1: Index(['DISTRICT', 'BLOCK', 'PANCHAYAT', 'VILLAGE', 'LATITUDE', 'LONGITUDE',
       'PH', 'P', 'K', 'CA', 'MG'],
      dtype='object')
df2: Index(['DISTRICT', 'BLOCK', 'PANCHAYAT', 'VILLAGE', 'LATITUDE', 'LONGITUDE',
       'PH', 'P', 'K', 'CA', 'MG'],
      dtype='object')


In [4]:
# Merge the dataframes
merged_df = pd.concat([df1, df2], ignore_index=True)

# Display the first few rows of the merged dataframe
display(merged_df.head())

# Display the shape of the merged dataframe to verify the merge
print(f"Shape of the merged dataframe: {merged_df.shape}")

Unnamed: 0,DISTRICT,BLOCK,PANCHAYAT,VILLAGE,LATITUDE,LONGITUDE,PH,P,K,CA,MG
0,Kottayam,Pampady,Akalakunnam,Akalakunnam,9.65674,76.63473,4.5,183.97,372.96,313.9,205.45
1,Kottayam,Pampady,Akalakunnam,Akalakunnam,9.65622,76.63101,5.6,57.31,268.24,1000.0,200.65
2,Kottayam,Pampady,Akalakunnam,Akalakunnam,9.65564,76.63068,5.9,79.07,565.26,949.95,254.48
3,Kottayam,Pampady,Akalakunnam,Akalakunnam,9.6567,76.63118,6.1,68.33,440.72,1000.0,404.48
4,Kottayam,Pampady,Akalakunnam,Akalakunnam,9.65708,76.63483,5.8,153.46,227.7,614.2,257.68


Shape of the merged dataframe: (3395, 11)


In [5]:
merged_df.describe()

Unnamed: 0,LATITUDE,LONGITUDE,PH,P,K,MG
count,3395.0,3395.0,3395.0,3395.0,3395.0,3395.0
mean,9.575456,76.683919,5.707246,55.115389,371.905928,194.868006
std,0.053736,0.052295,0.792423,48.50259,218.507048,128.033999
min,9.4626,76.56717,0.2,1.3,16.02,0.3
25%,9.532814,76.64385,5.2,19.29,223.495,102.5
50%,9.57283,76.68612,5.7,38.84,327.26,173.83
75%,9.61043,76.727779,6.235,75.82,468.38,275.0
max,9.71166,76.789701,8.0,305.4,1200.0,500.0


In [6]:
#Save File
merged_df.to_csv("Merged_Soil_Data.csv")