<a href="https://colab.research.google.com/github/HarithaNagella/weather-forecasting/blob/main/weather.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Step 1: Load the Data
df = pd.read_csv('/content/weather.csv')

# Step 2: Data Exploration
print("Data Head:\n", df.head())
print("\nData Info:\n", df.info())
print("\nData Description:\n", df.describe())

# Step 3: Data Cleaning and Preparation
# Convert 'Date' column to datetime, handle errors
if 'Date' in df.columns:
    try:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        # Drop rows where date conversion failed
        df = df.dropna(subset=['Date'])
    except Exception as e:
        print(f"Error converting 'Date' column: {e}")
else:
    print("Date column not found in the dataset.")

# Step 4: Feature Engineering
# Extract month from date
if 'Date' in df.columns:
    df['Month'] = df['Date'].dt.month
else:
    print("Date column not found in the dataset.")

# Step 5: Data Analysis
# Calculate average MaxTemp by month, handle missing values
try:
    monthly_avg_max_temp = df.groupby('Month')['MaxTemp'].mean()
except Exception as e:
    print(f"Error calculating monthly average max temperature: {e}")

# Step 6: Data Visualization
if 'Month' in df.columns:
    try:
        plt.figure(figsize=(10, 5))
        plt.plot(monthly_avg_max_temp.index, monthly_avg_max_temp.values, marker='o')
        plt.xlabel('Month')
        plt.ylabel('Average Max Temperature')
        plt.title('Monthly Average Max Temperature')
        plt.grid(True)
        plt.show()
    except Exception as e:
        print(f"Error plotting monthly average max temperature: {e}")
else:
    print("Month column not found in the dataset.")

# Step 7: Advanced Analysis (e.g., predict Rainfall)
# Prepare the data for prediction
if 'MinTemp' in df.columns and 'MaxTemp' in df.columns and 'Rainfall' in df.columns:
    X = df[['MinTemp', 'MaxTemp']]
    y = df['Rainfall']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create and train a linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Make predictions and calculate the Mean Squared Error
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f'Mean Squared Error for Rainfall Prediction: {mse}')

    # Step 8: Conclusions and Insights
    if 'Month' in df.columns and 'Rainfall' in df.columns:
        try:
            monthly_avg_rainfall = df.groupby('Month')['Rainfall'].mean()
            highest_rainfall_month = monthly_avg_rainfall.idxmax()
            lowest_rainfall_month = monthly_avg_rainfall.idxmin()
            print(f'Highest rainfall month: {highest_rainfall_month}, Lowest rainfall month: {lowest_rainfall_month}')

            # Visualize monthly average rainfall
            plt.figure(figsize=(10, 5))
            plt.plot(monthly_avg_rainfall.index, monthly_avg_rainfall.values, marker='o', color='b')
            plt.xlabel('Month')
            plt.ylabel('Average Rainfall')
            plt.title('Monthly Average Rainfall')
            plt.grid(True)
            plt.show()
        except Exception as e:
            print(f"Error analyzing rainfall data: {e}")
    else:
        print("Month or Rainfall column not found in the dataset.")
else:
    print("Columns (MinTemp, MaxTemp, Rainfall) not found in the dataset.")

# Step 9: Communication (Optional)
# Save or display the results and potentially export to a report or presentation

# Step 10: Future Work (Optional)
# Consider including additional features for analysis, using more advanced models, or extending the dataset for better predictions.


Data Head:
    MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine WindGustDir  \
0      8.0     24.3       0.0          3.4       6.3          NW   
1     14.0     26.9       3.6          4.4       9.7         ENE   
2     13.7     23.4       3.6          5.8       3.3          NW   
3     13.3     15.5      39.8          7.2       9.1          NW   
4      7.6     16.1       2.8          5.6      10.6         SSE   

   WindGustSpeed WindDir9am WindDir3pm  WindSpeed9am  ...  Humidity3pm  \
0           30.0         SW         NW           6.0  ...           29   
1           39.0          E          W           4.0  ...           36   
2           85.0          N        NNE           6.0  ...           69   
3           54.0        WNW          W          30.0  ...           56   
4           50.0        SSE        ESE          20.0  ...           49   

   Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  Temp3pm  RainToday  \
0       1019.7       1015.0         7         7     14