In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import json
import requests
import time

In [None]:
equipment = pd.read_csv('../data/equipment.csv')
equipment = equipment.dropna()

In [None]:
equipment.head()

In [None]:
n_clusters = 600

kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(equipment[['lat', 'lon']])

equipment['cluster'] = kmeans.predict(equipment[['lat', 'lon']])

fig, ax = plt.subplots()

# Step 1: Create the Scatter Plot
scatter = ax.scatter(equipment["lon"], equipment["lat"], c=equipment["cluster"], cmap='viridis')

# Step 2: Adjust the Map Settings
ax.set_aspect('equal', adjustable='box')  # Ensure the aspect ratio is equal
ax.set_xlim([min(equipment["lon"]) - 1, max(equipment["lon"]) + 1])  # Set longitude limits
ax.set_ylim([min(equipment["lat"]) - 1, max(equipment["lat"]) + 1])  # Set latitude limits

# Add labels and title
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title('Cluster Scatter Plot')

plt.show()

In [None]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()
api_key = os.getenv('API_KEY')
api_calls_per_minute = int(os.getenv('API_CALL_LIMIT'))

In [None]:
# Function to fetch weather data for a given location and date range
def get_weather_data(lat, lon):
    url = f'http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric'
    response = requests.get(url)
    data = response.json()
    return data

# Fetch data for each centroid
weather_data = []

iters = n_clusters // api_calls_per_minute
remainder = n_clusters % api_calls_per_minute

for i in range(iters):
    for j in range(api_calls_per_minute):
        cluster = i * api_calls_per_minute + j
        lat = kmeans.cluster_centers_[cluster][0]
        lon = kmeans.cluster_centers_[cluster][1]
        data = get_weather_data(lat, lon)
        weather_data.append(data)
    if i < iters - 1:
        time.sleep(60)

for i in range(remainder):
    cluster = iters * api_calls_per_minute + i
    lat = kmeans.cluster_centers_[cluster][0]
    lon = kmeans.cluster_centers_[cluster][1]
    data = get_weather_data(lat, lon)
    weather_data.append(data)

In [None]:
with open('../data/weather_data.json', 'w') as f:
    json.dump(weather_data, f)

In [None]:
with open('../data/weather_data.json', 'r') as f:
    weather_data = json.load(f)

In [None]:
sky_score_weights = {
    'clear sky': 0.1,
    'few clouds': 0.2,
    'scattered clouds': 0.3,
    'broken clouds': 0.4,
    'overcast clouds': 0.5,
    'mist': 0.6,
    'light rain': 0.6,
    'light intensity drizzle': 0.6,
    'light intensity drizzle rain': 0.6,
    'moderate rain': 0.7,
    'light intensity shower rain': 0.8,
    'shower rain': 0.8,
    'heavy intensity rain': 0.9,
    'very heavy rain': 1.0
}

weather_score_weights = {
    'temperature': 0.5,
    'humidity': 0.3,
    'wind_speed': 0.2,
    'rain': 0.5
}

In [None]:
stations = pd.DataFrame()
stations[['lat', 'lon']] = pd.DataFrame(kmeans.cluster_centers_)

# Calculate weather score for each station
weather_scores = []
for station in weather_data:
    sky_score = sky_score_weights[station['weather'][0]['description']]
    temp_score = station['main']['temp'] * weather_score_weights['temperature']
    humidity_score = station['main']['humidity'] * weather_score_weights['humidity']
    wind_score = station['wind']['speed'] * weather_score_weights['wind_speed']
    rain_score = station['rain']['1h'] * weather_score_weights['rain'] if 'rain' in station else 0
    weather_score = sky_score + temp_score + humidity_score + wind_score + rain_score
    weather_scores.append(weather_score)

stations['weather_score'] = weather_scores

stations.head()


In [None]:
# Weather score with PCA

pca = PCA(n_components=1)
features = ['sky', 'temp', 'humidity', 'wind', 'rain']

weather_data_pca = []

for station in weather_data:
    sky = sky_score_weights[station['weather'][0]['description']]
    temp = station['main']['temp']
    humidity = station['main']['humidity']
    wind = station['wind']['speed']
    rain = station['rain']['1h'] if 'rain' in station else 0
    weather_data_pca.append([sky, temp, humidity, wind, rain])

weather_data_pca = pca.fit_transform(weather_data_pca)

stations['weather_score_pca'] = weather_data_pca


In [None]:
with open('../data/weather_scores.csv', 'w') as f:
    stations.to_csv(f, index=False)

In [None]:
loadings = pca.components_[0]

for index, feature in enumerate(features):
    print(f"Feature {feature} contributes {loadings[index]} to the Principal component")