In [1]:
import pandas as pd

In [8]:
file_path = r"C:\\Users\\91987\\OneDrive\\Desktop\\climate_distortion_with_region.csv"
df = pd.read_csv(file_path, encoding='ISO-8859-1')
print(df.head())

         Date Country         Region  Crop_Type  Average_Temperature_C  \
0  15-01-2001   India    West Bengal       Corn                   1.55   
1  15-01-2024   China          North       Corn                   3.23   
2  15-01-2001  France  Ile-de-France      Wheat                  21.11   
3  15-01-2001  Canada       Prairies     Coffee                  27.85   
4  15-01-1998   India     Tamil Nadu  Sugarcane                   2.19   

   Total_Precipitation_mm  CO2_Emissions_MT  Crop_Yield_MT_per_HA  \
0                  447.06             15.22                 1.737   
1                 2913.57             29.82                 1.737   
2                 1301.74             25.75                 1.719   
3                 1154.36             13.91                 3.890   
4                 1627.48             11.81                 1.080   

   Extreme_Weather_Events  Irrigation_Access_%  Pesticide_Use_KG_per_HA  \
0                       8                14.54                   

In [9]:
null_counts = df.isnull().sum()
columns_with_nulls = null_counts[null_counts > 0]
num_columns_with_nulls = len(columns_with_nulls)

In [10]:
print(f"Number of columns with NULL values: {num_columns_with_nulls}")
print("Columns with NULL values:")
print(columns_with_nulls)

Number of columns with NULL values: 2
Columns with NULL values:
Soil_Health_Index         9
Adaptation_Strategies    12
dtype: int64


In [14]:
for column in columns_with_nulls.index:
    if df[column].dtype == 'object':  #Categorical column
        mode_value = df[column].mode()[0]
        df[column] = df[column].fillna(mode_value)  #Assign back to the column
    else:  # Numerical column
        mean_value = df[column].mean()
        df[column] = df[column].fillna(mean_value)  #Assign back to the column

In [13]:
null_counts_after = df.isnull().sum()
print("NULL values after imputation:")
print(null_counts_after)

NULL values after imputation:
Date                           0
Country                        0
Region                         0
Crop_Type                      0
Average_Temperature_C          0
Total_Precipitation_mm         0
CO2_Emissions_MT               0
Crop_Yield_MT_per_HA           0
Extreme_Weather_Events         0
Irrigation_Access_%            0
Pesticide_Use_KG_per_HA        0
Fertilizer_Use_KG_per_HA       0
Soil_Health_Index              0
Adaptation_Strategies          0
Economic_Impact_Million_USD    0
dtype: int64


In [16]:
canada_data = df[df['Country'] == 'Canada']

In [17]:
unique_regions = canada_data['Region'].unique()

In [18]:
num_unique_regions = len(unique_regions)

In [19]:
print(f"Unique regions recorded for Canada: {num_unique_regions}")
print("Regions:", unique_regions)

Unique regions recorded for Canada: 4
Regions: ['Prairies' 'Quebec' 'Ontario' 'British Columbia']


In [22]:
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')

In [23]:
data_2023 = df[df['Date'].dt.year == 2023]

In [25]:
max_emissions_row = data_2023.loc[data_2023['CO2_Emissions_MT'].idxmax()]
max_country = max_emissions_row['Country']
max_emissions = max_emissions_row['CO2_Emissions_MT']

In [26]:
min_emissions_row = data_2023.loc[data_2023['CO2_Emissions_MT'].idxmin()]
min_country = min_emissions_row['Country']
min_emissions = min_emissions_row['CO2_Emissions_MT']

In [27]:
print(f"Country with the most CO2 emissions in 2023: {max_country} with {max_emissions} MT")
print(f"Country with the least CO2 emissions in 2023: {min_country} with {min_emissions} MT")

Country with the most CO2 emissions in 2023: China with 29.98 MT
Country with the least CO2 emissions in 2023: USA with 0.71 MT


In [28]:
usa_data = df[df['Country'] == 'USA']

In [29]:
average_temperatures = usa_data.groupby('Region')['Average_Temperature_C'].mean()

In [30]:
coolest_region = average_temperatures.idxmin()  # Region with the minimum average temperature
coolest_temperature = average_temperatures.min()  # Coolest average temperature

In [31]:
print(f"The coolest region in the USA is '{coolest_region}' with an average temperature of {coolest_temperature:.2f}°C.")

The coolest region in the USA is 'West' with an average temperature of 15.24°C.


In [32]:
midwest_data = df[(df['Region'] == 'Midwest') & (df['Country'] == 'USA')]

In [33]:
economic_impact_by_crop = midwest_data.groupby('Crop_Type')['Economic_Impact_Million_USD'].sum()

In [34]:
highest_impact_crop = economic_impact_by_crop.idxmax()  # Crop type with the maximum economic impact
highest_impact_value = economic_impact_by_crop.max()  # Maximum economic impact value

In [35]:
print(f"The crop type from the Midwest region in the USA with the highest historical economic impact is '{highest_impact_crop}' with an economic impact of ${highest_impact_value:.2f} million USD.")

The crop type from the Midwest region in the USA with the highest historical economic impact is 'Coffee' with an economic impact of $21970.56 million USD.
