#Weather API + Weather Observations Machine Learning exercise 

In [2]:
# Install modules to interact with weather API
import requests
import os
import pandas as pd
from dotenv import load_dotenv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
import datetime as dt

In [3]:
try:
    # Load API key from .env file
    load_dotenv()
    API_KEY = os.getenv('API_KEY')

    # Collect weather data for the last 6 months
    weather_data_days = []
    for i in range(180):
        # Initialize date and format for the API
        date = dt.datetime.now() - dt.timedelta(days=i)
        formatted_date = date.strftime("%Y-%m-%d")
        
        # API request
        response = requests.get(f'http://api.weatherapi.com/v1/history.json?key={API_KEY}&q=02453&dt={formatted_date}')
        if response.status_code == 200:
            response_data = response.json()
            forecast = response_data["forecast"]["forecastday"][0]
            # print(forecast)
            weather_data_days.append({
                # "Date": forecast["date"],
                "date_epoch": forecast["date_epoch"],
                "Temperature": forecast["day"]["avgtemp_f"],
                "Precipitation": forecast["day"]["totalprecip_in"],
                "Cloud cover": forecast["day"]["condition"]["text"]
            })
        else:
            print(f"Failed to fetch weather data for {formatted_date}. Status code: {response.status_code}")
    # Convert collected weather data to a DataFrame
    if weather_data_days:
        weather_data_df = pd.DataFrame(weather_data_days)
    else:
        raise ValueError("No weather data collected. Please check API responses.")

    # Load historical weather data if available
    try:
        historical_weather_data = pd.read_csv('historical_weather_data.csv')
        all_weather_data = pd.concat([weather_data_df, historical_weather_data], ignore_index=True)
    except FileNotFoundError:
        print("No historical weather data found. Using only collected data.")
        all_weather_data = weather_data_df

    # Encode "Cloud cover" to numeric values
    label_encoder = LabelEncoder()
    all_weather_data["Cloud cover"] = label_encoder.fit_transform(all_weather_data["Cloud cover"])

    # Features and target
    X = all_weather_data[["Temperature","date_epoch","Precipitation", "Cloud cover"]]
    y = all_weather_data["Temperature"].shift(-1)  # Predict next day's temperature

    # Drop the last row of X and y to ensure they match in size
    X = X.iloc[:-1]
    y = y.iloc[:-1]

    # Handle NaNs in X
    if X.isna().sum().sum() > 0:
        print("Handling NaNs in X...")
        X = X.fillna("")  # Replace NaNs with the column mean

    # Handle NaNs in y (if applicable)
    if y.isna().sum() > 0:
        print("Handling NaNs in y...")
        y = y.fillna("")  # Replace NaNs with the mean value of y

    # Normalize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


    # Train k-NN model
    knn = KNeighborsRegressor(n_neighbors=5)
    knn.fit(X_train, y_train)

    # Evaluate the model
    accuracy = knn.score(X_test, y_test)
    print(f"Model Accuracy: {accuracy:.2f}")

    # Predict tomorrow's temperature
    tomorrow_features = X_scaled[-1].reshape(1, -1)  # Use the last available row
    tomorrow_prediction = knn.predict(tomorrow_features)
    print(f"Predicted Temperature for Tomorrow: {tomorrow_prediction[0]:.2f}°F")

except Exception as e:
    print(f"An error occurred: {e}")

   

Model Accuracy: 0.88
Predicted Temperature for Tomorrow: 25.08°F


Date,date_epoch,Temperature,Cloud cover,Precipitation
2025-01-16,1737028800,5,Clear,0
2025-01-16,1737054000,28,Partly Cloudy,0
2025-01-16,1737079200,17,Cloudy,0
2025-01-17,1737115200,7,Clear,0
2025-01-17,1737140400,30,Partly Cloudy,0
2025-01-17,1737165600,19,Cloudy,0
2025-01-18,1737201600,10,Clear,0
2025-01-18,1737226800,32,Partly Cloudy,0
2025-01-18,1737252000,22,Cloudy,0