**1.Importing the Libraries**
 

In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
weather_data  = pd.read_csv('C:/SAI/Semesters/6th Semester/ML Lab/Lab 2/Bias_correction_ucl.csv')

In [None]:
weather_data 

#  Data Exploration:  
 - Display the dimension,shape, size and attributes type
 - display the first few rows.
 - Provide summary statistics for key features. 
 - Identify and handle any missing values


In [None]:
def data_exploration(weather_data):
    print("\n Data Exploration:")
    print("Dimension: ", weather_data.ndim)
    print("Shape: ", weather_data.shape)
    print("Size: ", weather_data.size)
    print("Attributes types:\n ", weather_data.dtypes)
    print("First few rows:\n ", weather_data.head())
    print("Summary statistics:\n ", weather_data.describe())
    weather_data = weather_data.dropna()
    weather_data = weather_data[(weather_data['Present_Tmax']>0) & (weather_data['Present_Tmin']>0)]
    return weather_data

# Temporal Analysis:  
- Explore the temporal aspect of the dataset. Are there any trends or patterns over time? 
- Visualize the variation in temperature (Present_Tmax and Present_Tmin) over different years.

In [None]:
def temporal_analysis(weather_data):
        print("\nTemporal Analysis")
        print("Visualize the variation in temperature over different years")

        weather_data = data_exploration(weather_data)

        weather_data['Date'] = pd.to_datetime(weather_data['Date'])

        weather_data.set_index('Date', inplace=True)

        plt.figure(figsize=(12, 8))
        sns.lineplot(data=weather_data[['Present_Tmax', 'Present_Tmin']])
        plt.title('Temperature Variation Over Different Years')
        plt.xlabel('Date')
        plt.ylabel('Temperature (°C)')
        plt.show()



# Geographical Analysis:  
- Explore the geographical features (lat, lon, DEM, Slope). 
- Visualize the distribution of weather stations on a map. 
- Analyze how elevation (DEM) and slope impact temperature. 

In [None]:
def geographical_analysis(df):
    print("\nGeographical Analysis:")
    print("1. Explore the geographical features (lat, lon, DEM, Slope)")
    print("2. Visualize the distribution of weather stations on a map")
    print("3. Analyze how elevation (DEM) and slope impact temperature")
    option = int(input("Select the option (1-3): "))

    if option == 1:
        print("\nGeographical Features:")
        geographical_features = ['lat', 'lon', 'DEM', 'Slope']
        print(df[geographical_features].head())
        print("\nGeographical Features Distribution:")
        print(df[geographical_features].describe())

    elif option == 2:
        # Visualize the distribution of weather stations on a map
        plt.figure(figsize=(10, 8))
        plt.scatter(df['lon'], df['lat'], c='blue', marker='o', alpha=0.6)
        plt.xlabel('Longitude')
        plt.ylabel('Latitude')
        plt.title('Distribution of Weather Stations on Map')
        plt.show()

    elif option == 3:
        # Analyze how elevation (DEM) and slope impact temperature
        plt.figure(figsize=(14, 6))
        
        # Scatter plot for elevation vs. temperature
        plt.subplot(1, 2, 1)
        sns.scatterplot(x='DEM', y='Present_Tmax', data=df)
        plt.xlabel('Elevation (DEM)')
        plt.ylabel('Present_Tmax')
        plt.title('Impact of Elevation on Maximum Temperature')

        # Scatter plot for slope vs. temperature
        plt.subplot(1, 2, 2)
        sns.scatterplot(x='Slope', y='Present_Tmax', data=df)
        plt.xlabel('Slope')
        plt.ylabel('Present_Tmax')
        plt.title('Impact of Slope on Maximum Temperature')

        plt.tight_layout()
        plt.show()

# Correlation Analysis:  
 - Calculate and visualize the correlation matrix for numerical features. 
 - Identify highly correlated featu

In [None]:
def correlational_analysis(weather_data):
    weather_data_numeric = weather_data.drop(columns=['Date'])

    correlational_matrix = weather_data_numeric.corr()

    plt.figure(figsize=(14, 10))
    sns.heatmap(correlational_matrix, annot=True, cmap='coolwarm', fmt=".2f")
    plt.title("Correlation Matrix")
    plt.show()


# Feature-specific Analysis:  
- Explore the LDAPS features (RH, Tmax_lapse, Tmin_lapse, WS, LH, CC, PPT). 
- Visualize the relationships between LDAPS features and other key variables. 
- Analyze the impact of cloud cover and precipitation on temperature. 
- Plot the individual box plot for all attributes except the date attribute

In [None]:

def feature_specific_analysis(df):
    print("\nFeature Specific Analysis:")
    print("1. Explore the LDAPS features and Visualize the relationships")
    print("2. Analyze the impact of cloud cover and precipitation on temperature")
    option = int(input("Select the option (1-2): "))

    ldaps_features = ['LDAPS_RHmin', 'LDAPS_RHmax', 'LDAPS_Tmax_lapse', 'LDAPS_Tmin_lapse',
                      'LDAPS_WS', 'LDAPS_LH', 'LDAPS_CC1', 'LDAPS_CC2', 'LDAPS_CC3', 'LDAPS_CC4',
                      'LDAPS_PPT1', 'LDAPS_PPT2', 'LDAPS_PPT3', 'LDAPS_PPT4']

    if option == 1:
        print("\nLDAPS Features:")
        print(df[ldaps_features].head())
        print("\nLDAPS Features Distribution:")
        print(df[ldaps_features].describe())
        print("\nLDAPS Features Correlation Matrix:")
        plt.figure(figsize=[14, 18])
        sns.heatmap(df[ldaps_features].corr(), annot=True, cmap='viridis', fmt=".2f", linewidth=1)
        plt.title("LDAPS Features Correlation Matrix")
        plt.show()

        key_variables = ['Present_Tmax', 'Present_Tmin', 'LDAPS_WS', 'LDAPS_LH']

        plt.figure(figsize=[18, 12])
        for i, feature in enumerate(ldaps_features):
            for j, var in enumerate(key_variables):
                plt.subplot(len(ldaps_features), len(key_variables), i * len(key_variables) + j + 1)
                plt.scatter(df[var], df[feature], alpha=0.6)
                plt.xlabel(var)
                plt.ylabel(feature)
                plt.title(f"Relationship between {var} and {feature}")
        plt.tight_layout()
        plt.show()
    elif option == 2:
        all_attributes = df.columns.difference(ldaps_features)
        plt.figure(figsize=[14, 10])
        sns.boxplot(data=df[all_attributes], palette='viridis')
        plt.title("Box Plot for All Attributes Except LDAPS Features")
        plt.show()

        temp_variables = ['Present_Tmax', 'Present_Tmin']
        cloud_variables = ['LDAPS_CC1', 'LDAPS_CC2', 'LDAPS_CC3', 'LDAPS_CC4']
        ppt_variables = ['LDAPS_PPT1', 'LDAPS_PPT2', 'LDAPS_PPT3', 'LDAPS_PPT4']

        for temp_var in temp_variables:
            for cloud_var in cloud_variables:
                plt.scatter(df[cloud_var], df[temp_var])
                plt.xlabel(cloud_var)
                plt.ylabel(temp_var)
                plt.title(f"Impact of {cloud_var} on {temp_var}")
                plt.show()

        for temp_var in temp_variables:
            for ppt_var in ppt_variables:
                plt.scatter(df[ppt_var], df[temp_var])
                plt.xlabel(ppt_var)
                plt.ylabel(temp_var)
                plt.title(f"Impact of {ppt_var} on {temp_var}")
                plt.show()

# Box plot for All Attributes

In [None]:
def box_plot_all_attributes(dataset):
    box_plot_attributes = dataset.columns.difference(['Date'])
    plt.figure(figsize=(18, 12))

    for attribute in box_plot_attributes:
        plt.figure(figsize=(8, 6))
        sns.boxplot(x=attribute, data=dataset)
        plt.title(f'Box Plot for {attribute}')
        plt.xticks(rotation=45)
        plt.show()

# Data Visualization and Handling

In [None]:
while True:
    print("\nMenu ")
    print("1. Data Exploration")
    print("2. Temporal Analysis")
    print("3. Geographical Analysis")
    print("4. Correaltion Analysis")
    print("5. Feature-specific  Analysis")
    print("6. Additional Task :  Box plot for All Attributes")
    print("7. Exit")
    
    choice = int(input("Enter your choice(1-7): "))
    
    if choice == 1:
        weather_data = data_exploration(weather_data)
    elif choice == 2:
        temporal_analysis(weather_data)
    elif choice == 3:
        geographical_analysis(weather_data)
    elif choice == 4:
        correlational_analysis(weather_data)
    elif choice == 5:
        feature_specific_analysis(weather_data)
    elif choice == 6:
        box_plot_all_attributes(weather_data)
    elif choice == 7:
        print("Exiting the program. Thank you!")
        break
    else:
        print("Invalid choice. Please enter a valid option: ")
        
