In [None]:
pip install pandas scikit-learn openpyxl



In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
import pickle
import os

In [None]:
excel_file_path = '/content/data_cord.xlsx'
location_mapping = pd.read_excel(excel_file_path, header=None)

location_mapping.columns = ['Key', 'Longitude', 'Latitude']
print(location_mapping.head())

location_mapping.to_csv('/content/labeled_data_cord.csv', index=False)

   Key  Longitude  Latitude
0    0   -179.750     -90.0
1    1   -160.623     -90.0
2    2   -141.496     -90.0
3    3   -122.369     -90.0
4    4   -103.242     -90.0


In [None]:
csv_directory = '/content'

def load_weather_data(param):
    weather_data = []
    for year in range(2019, 2024):
        file_name = f"{param}_{year}.csv"
        file_path = os.path.join(csv_directory, file_name)
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            weather_data.append(df)
        else:
            print(f"File not found: {file_name}")
    return pd.concat(weather_data, ignore_index=True)

temperature_data = load_weather_data('Temperature')
wind_speed_data = load_weather_data('Wind')
rain_probability_data = load_weather_data('Rain')
moisture_data = load_weather_data('Moisture')

print(temperature_data.head())

   9979980        0        1        2        3        4        5        6  \
0      0.0 -10.9407 -10.9176 -10.7401 -10.6351 -10.6105 -10.6537 -10.7529   
1      1.0 -11.4615 -11.4658 -11.2996 -11.1863 -11.1189 -11.0640 -11.0277   
2      2.0 -11.6993 -11.7204 -11.5398 -11.4029 -11.3031 -11.2401 -11.2297   
3      3.0 -11.8687 -11.8832 -11.7073 -11.5691 -11.4583 -11.3613 -11.3092   
4      4.0 -11.9462 -12.0216 -11.8642 -11.7481 -11.6750 -11.6107 -11.5904   

         7        8  ...    12968    12969    12970    12971    12972  12973  \
0 -10.8037 -10.8785  ... -18.0806 -18.0993 -18.0993 -18.1180 -18.7721      0   
1 -10.9726 -10.9973  ... -17.7401 -17.7584 -17.7584 -17.7768 -18.4185      0   
2 -11.2281 -11.2914  ... -18.4289 -18.4480 -18.4480 -18.4670 -19.1337      0   
3 -11.2606 -11.2767  ... -18.0342 -18.0528 -18.0528 -18.0715 -18.7239      0   
4 -11.5531 -11.5631  ... -18.6457 -18.6649 -18.6649 -18.6842 -19.3587      0   

   10507020  11032620  11558220  12083820  
0       NaN 

In [None]:
print("Columns in temperature_data:", temperature_data.columns)
print("Columns in location_mapping:", location_mapping.columns)

Columns in temperature_data: Index(['9979980', '0', '1', '2', '3', '4', '5', '6', '7', '8',
       ...
       '12968', '12969', '12970', '12971', '12972', '12973', '10507020',
       '11032620', '11558220', '12083820'],
      dtype='object', length=12979)
Columns in location_mapping: Index(['Key', 'Longitude', 'Latitude'], dtype='object')


In [None]:
location_mapping.columns = ['Key', 'Longitude', 'Latitude']


temperature_data.rename(columns={'9979980': 'Key'}, inplace=True)
wind_speed_data.rename(columns={'9979980': 'Key'}, inplace=True)
rain_probability_data.rename(columns={'9979980': 'Key'}, inplace=True)
moisture_data.rename(columns={'9979980': 'Key'}, inplace=True)


def merge_with_mapping(weather_data, mapping):
    return weather_data.merge(mapping, on='Key')


temperature_data = merge_with_mapping(temperature_data, location_mapping)
wind_speed_data = merge_with_mapping(wind_speed_data, location_mapping)
rain_probability_data = merge_with_mapping(rain_probability_data, location_mapping)
moisture_data = merge_with_mapping(moisture_data, location_mapping)


print(temperature_data.head())

   Key        0        1        2        3        4        5        6  \
0  0.0 -10.9407 -10.9176 -10.7401 -10.6351 -10.6105 -10.6537 -10.7529   
1  1.0 -11.4615 -11.4658 -11.2996 -11.1863 -11.1189 -11.0640 -11.0277   
2  2.0 -11.6993 -11.7204 -11.5398 -11.4029 -11.3031 -11.2401 -11.2297   
3  3.0 -11.8687 -11.8832 -11.7073 -11.5691 -11.4583 -11.3613 -11.3092   
4  4.0 -11.9462 -12.0216 -11.8642 -11.7481 -11.6750 -11.6107 -11.5904   

         7        8  ...    12970    12971    12972  12973  10507020  \
0 -10.8037 -10.8785  ... -18.0993 -18.1180 -18.7721      0       NaN   
1 -10.9726 -10.9973  ... -17.7584 -17.7768 -18.4185      0       NaN   
2 -11.2281 -11.2914  ... -18.4480 -18.4670 -19.1337      0       NaN   
3 -11.2606 -11.2767  ... -18.0528 -18.0715 -18.7239      0       NaN   
4 -11.5531 -11.5631  ... -18.6649 -18.6842 -19.3587      0       NaN   

   11032620  11558220  12083820  Longitude  Latitude  
0       NaN       NaN       NaN   -179.750     -90.0  
1       NaN       

In [None]:
scaler_coords = MinMaxScaler()
scaler_weather = MinMaxScaler()
def normalize_data(df):

    df[['Longitude', 'Latitude']] = scaler_coords.fit_transform(df[['Longitude', 'Latitude']])
    weather_columns = [col for col in df.columns if col not in ['Key', 'Longitude', 'Latitude'] and not df[col].isnull().all()]
    df[weather_columns] = scaler_weather.fit_transform(df[weather_columns])

    return df

temperature_data = normalize_data(temperature_data)
wind_speed_data = normalize_data(wind_speed_data)
rain_probability_data = normalize_data(rain_probability_data)
moisture_data = normalize_data(moisture_data)


print(temperature_data.head())

   Key         0         1         2         3         4         5         6  \
0  0.0  0.913409  0.911338  0.907702  0.904849  0.900676  0.892140  0.882151   
1  1.0  0.889405  0.886420  0.882275  0.879800  0.877581  0.873473  0.869724   
2  2.0  0.878445  0.874847  0.871359  0.869957  0.869214  0.865461  0.860590   
3  3.0  0.870637  0.867447  0.863747  0.862404  0.862164  0.859947  0.856995   
4  4.0  0.867065  0.861156  0.856617  0.854270  0.852320  0.848600  0.844279   

          7         8  ...     12970     12971     12972  12973  10507020  \
0  0.879423  0.883933  ...  0.283128  0.283129  0.283128    0.0       NaN   
1  0.871748  0.878520  ...  0.295775  0.295773  0.295776    0.0       NaN   
2  0.860137  0.865119  ...  0.270192  0.270195  0.270194    0.0       NaN   
3  0.858660  0.865788  ...  0.284853  0.284852  0.284852    0.0       NaN   
4  0.845369  0.852738  ...  0.262145  0.262145  0.262146    0.0       NaN   

   11032620  11558220  12083820  Longitude  Latitude  
0

In [None]:

def combine_csv_files(param, years, csv_directory):
    combined_data = []
    for year in years:
        file_path = os.path.join(csv_directory, f"{param}_{year}.csv")
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            combined_data.append(df)
        else:
            print(f"File not found: {file_path}")
    return pd.concat(combined_data, ignore_index=True)

csv_directory = '/content'  
years = range(2019, 2024)

temperature_data = combine_csv_files('Temperature', years, csv_directory)
wind_speed_data = combine_csv_files('Wind', years, csv_directory)
rain_probability_data = combine_csv_files('Rain', years, csv_directory)
moisture_data = combine_csv_files('Moisture', years, csv_directory)

In [1]:
def aggregate_data(data, window=14):
    data['Date'] = pd.to_datetime(data['Day'], unit='D', origin=pd.Timestamp('2019-01-01'))
    data['Period'] = (data['Day'] // window).astype(int)
    aggregated = data.groupby(['Longitude', 'Latitude', 'Period']).mean().reset_index()
    return aggregated

temperature_data = aggregate_data(temperature_data)
wind_speed_data = aggregate_data(wind_speed_data)
rain_probability_data = aggregate_data(rain_probability_data)
moisture_data = aggregate_data(moisture_data)

In [None]:
def train_kmeans_with_day(df, n_clusters, output_param):

    scaler_day = MinMaxScaler()  
    days = np.linspace(1, len(df), len(df)).reshape(-1, 1) 
    df['Normalized_Day'] = scaler_day.fit_transform(days)
    
    features = df[['Longitude', 'Latitude']].copy()
    features['Day'] = df['Normalized_Day']
   
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(features)
    print(f"Model for {output_param} trained with {n_clusters} clusters.")
    return kmeans, scaler_day

temperature_kmeans, temperature_day_scaler = train_kmeans_with_day(temperature_data, 5, 'Temperature')
wind_speed_kmeans, wind_day_scaler = train_kmeans_with_day(wind_speed_data, 5, 'Wind Speed')
rain_probability_kmeans, rain_day_scaler = train_kmeans_with_day(rain_probability_data, 5, 'Rain Probability')
moisture_kmeans, moisture_day_scaler = train_kmeans_with_day(moisture_data, 5, 'Moisture')

  df['Normalized_Day'] = scaler_day.fit_transform(days)
  df['Normalized_Day'] = scaler_day.fit_transform(days)
  df['Normalized_Day'] = scaler_day.fit_transform(days)


Model for Temperature trained with 5 clusters.
Model for Wind Speed trained with 5 clusters.
Model for Rain Probability trained with 5 clusters.
Model for Moisture trained with 5 clusters.


  df['Normalized_Day'] = scaler_day.fit_transform(days)


In [None]:
output_directory = '/content'

def save_model_and_scaler(model, scaler, model_filename, scaler_filename):
    with open(os.path.join(output_directory, model_filename), 'wb') as file:
        pickle.dump(model, file)
    with open(os.path.join(output_directory, scaler_filename), 'wb') as file:
        pickle.dump(scaler, file)
    print(f"Model saved as {model_filename}, Scaler saved as {scaler_filename}")

save_model_and_scaler(temperature_kmeans, temperature_day_scaler, 'temperature.pkl', 'temperature_day_scaler.pkl')
save_model_and_scaler(wind_speed_kmeans, wind_day_scaler, 'wind_speed.pkl', 'wind_speed_day_scaler.pkl')
save_model_and_scaler(rain_probability_kmeans, rain_day_scaler, 'rain_probability.pkl', 'rain_probability_day_scaler.pkl')
save_model_and_scaler(moisture_kmeans, moisture_day_scaler, 'moisture.pkl', 'moisture_day_scaler.pkl')

Model saved as temperature.pkl, Scaler saved as temperature_day_scaler.pkl
Model saved as wind_speed.pkl, Scaler saved as wind_speed_day_scaler.pkl
Model saved as rain_probability.pkl, Scaler saved as rain_probability_day_scaler.pkl
Model saved as moisture.pkl, Scaler saved as moisture_day_scaler.pkl


In [None]:

def analyze_clusters(data, kmeans_model, features):
    data['Cluster'] = kmeans_model.predict(data[features])
    cluster_summary = data.groupby('Cluster').mean()
    return cluster_summary

temperature_clusters = analyze_clusters(temperature_data, temperature_kmeans, features)
wind_speed_clusters = analyze_clusters(wind_speed_data, wind_speed_kmeans, features)
rain_probability_clusters = analyze_clusters(rain_probability_data, rain_probability_kmeans, features)
moisture_clusters = analyze_clusters(moisture_data, moisture_kmeans, features)

print("Temperature Cluster Analysis:")
print(temperature_clusters)

In [None]:
def predict_weather(longitude, latitude, period, weather_values, kmeans_model, cluster_summary):

    normalized_coords = scaler_coords.transform([[longitude, latitude]])
    normalized_period = scaler_day.transform([[period]])
    normalized_weather = scaler_weather.transform([weather_values])

    input_features = np.hstack((normalized_coords, normalized_period, normalized_weather))

    cluster = kmeans_model.predict(input_features)[0]

    cluster_info = cluster_summary.loc[cluster]

    return {
        'Cluster': cluster,
        'Cluster Characteristics': cluster_info.to_dict(),
    }
result = predict_weather(-52.2391, 113.889, 100, [0.5, 0.8, 1.2], temperature_kmeans, temperature_clusters)
print(result)

In [None]:
def predict_val(longitude, latitude, day):
    normalized_coords = scaler_coords.transform([[longitude, latitude]])
    normalized_day = temperature_day_scaler.transform([[day]])[0][0]  )
    input_features = np.hstack((normalized_coords, [[normalized_day]]))

    with open(os.path.join(output_directory, 'temperature.pkl'), 'rb') as file:
        temp_model = pickle.load(file)
    with open(os.path.join(output_directory, 'wind_speed.pkl'), 'rb') as file:
        wind_model = pickle.load(file)
    with open(os.path.join(output_directory, 'rain_probability.pkl'), 'rb') as file:
        rain_model = pickle.load(file)
    with open(os.path.join(output_directory, 'moisture.pkl'), 'rb') as file:
        moisture_model = pickle.load(file)

    temp_cluster = temp_model.predict(input_features)[0]
    wind_cluster = wind_model.predict(input_features)[0]
    rain_cluster = rain_model.predict(input_features)[0]
    moisture_cluster = moisture_model.predict(input_features)[0]

    return {
        'Temperature Cluster': temp_cluster,
        'Wind Speed Cluster': wind_cluster,
        'Rain Probability Cluster': rain_cluster,
        'Moisture Cluster': moisture_cluster
    }


print(predict_val(-52.2391,113.889, 120))  

{'Temperature Cluster': 1, 'Wind Speed Cluster': 1, 'Rain Probability Cluster': 1, 'Moisture Cluster': 1}


