In [None]:
#lan/lon
df_region = df[['Area Latitude', 'Area Longitude']]
df_region

In [None]:
#drop the duplications if the row is completely same 
df_region_drop_dup = df_region.drop_duplicates()
df_region_drop_dup


In [None]:
#saving the weather_june
import numpy as np
import pandas as pd
import requests
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

tqdm.pandas()

def get_weather(lat, lon):
    url = f"https://weather.lewagon.com/data/2.5/forecast"
    list_of_temps = []
    list_of_weathers = []
    list_of_dates = []

    res = requests.get(url, params={"lat":lat,"lon":lon})
    
    if res.status_code == 200:
        data = res.json()['list']
        
        for dic in data:
            try:
                 temp = dic["main"]["temp"]
            except:
                 temp = "no temp"

            try:
                 weather = dic["weather"][0]["main"]
            except:
                 weather = "no weather"

            try:
                 date = dic["dt_txt"]
            except:
                 date = "no date"

            list_of_temps.append(temp)
            list_of_weathers.append(weather)
            list_of_dates.append(date)

    else:
        list_of_temps.append(res.status_code)

    return pd.DataFrame({
        "datetime": list_of_dates,
        "temp": list_of_temps,
        "weather": list_of_weathers,
        "lat": lat,
        "lon": lon
    })

# 좌표 리스트
list_of_coords = list(zip(df_region_drop_dup['Area Latitude'], df_region_drop_dup['Area Longitude']))  # 또는 df['latitude'], df['longitude']

# 병렬 처리로 데이터 수집
with ThreadPoolExecutor(max_workers=10) as executor:
    results = list(tqdm(executor.map(lambda x: get_weather(x[0], x[1]), list_of_coords), total=len(list_of_coords)))

# 결과를 하나의 DataFrame으로 합치기
weather_df = pd.concat(results, ignore_index=True)
weather_df

In [None]:
weather_df.to_csv('weather_dataJune.csv', index=False)
weather_df = pd.read_csv('weather_dataJune.csv')
weather_df

In [None]:
#cleaning
weather_df['weather'].unique()
weather_df['weather'].value_counts()

In [None]:
# safely copied
weather_df_filtered = weather_df_filtered.copy()

# weather and temp extracting
weather_df_filtered['date'] = weather_df_filtered['datetime'].dt.date
weather_df_filtered['hour'] = weather_df_filtered['datetime'].dt.hour

# temp pivot 
temp_pivot = weather_df_filtered.pivot_table(
    index=['lat', 'lon', 'date'],
    columns='hour',
    values='temp',
    aggfunc='mean'
).add_prefix('temp_')

# weather pivot
weather_pivot = weather_df_filtered.pivot_table(
    index=['lat', 'lon', 'date'],
    columns='hour',
    values='weather',
    aggfunc='first'
).add_prefix('weather_')

# concat
weather_df_wide = pd.concat([temp_pivot, weather_pivot], axis=1).reset_index()
weather_df_wide

In [None]:
#mean of temp
weather_df_wide['temp_avg'] = weather_df_wide[['temp_6', 'temp_9', 'temp_12']].mean(axis=1)

In [None]:

#weighted mean of weather
weather_types = ['Clouds', 'Clear', 'Rain', 'Snow']

def calc_weather_percentages(row):
    values = [row['weather_6'], row['weather_9'], row['weather_12']]
    total = len(values)
    counts = {wt: 0 for wt in weather_types}
    
    for val in values:
        if val in counts:
            counts[val] += 1
    
    # calculating of rate (0~100%)
    return pd.Series({f"{wt.lower()}_pct": (counts[wt] / total) * 100 for wt in weather_types})
# df_weather_pct = weather_df_wide.apply(calc_weather_percentages, axis=1)
weather_df_wide[['clouds_pct', 'clear_pct', 'rain_pct', 'snow_pct']] = weather_df_wide.apply(calc_weather_percentages, axis=1)

In [None]:
def summarize_weather(row):
    cols = ['clouds_pct', 'clear_pct', 'rain_pct', 'snow_pct']
    max_col = row[cols].idxmax()               # the name of the column with the highest possibility 
    max_val = row[max_col]                     # the value of that column 
    label = max_col.replace('_pct', '').capitalize()  # 'clouds_pct' → 'Clouds'
    return f"{label} ({max_val:.1f}%)"
weather_df_wide['weather_summary'] = weather_df_wide.apply(summarize_weather, axis=1)


In [None]:
cols_to_drop = [
    'temp_6', 'temp_9', 'temp_12',
    'weather_6', 'weather_9', 'weather_12',
    'clouds_pct', 'clear_pct', 'rain_pct', 'snow_pct'
]

weather_df_wide.drop(columns=cols_to_drop, inplace=True)

In [None]:
# # 1. transformating Kelvin to Celcius 
weather_df_wide['temp_avg'] = weather_df_wide['temp_avg'] - 273.15

# 2. concatenation 
weather_df_wide['summary'] = weather_df_wide['temp_avg'].round(1).astype(str) + '°C, ' + weather_df_wide['weather_summary']

# 3. pivot by days
weather_df_summary = weather_df_wide.pivot_table(
    index=['lat', 'lon'],
    columns='date',
    values='summary',
    aggfunc='first'  
).reset_index()

# 4. arrange the name of columns 
weather_df_summary.columns.name = None  # remove 'date' as the columns name

In [None]:
from datetime import datetime

weather_df_summary.columns = [
    datetime.strptime(str(col), "%Y-%m-%d").strftime("%B %d %Y")
    if str(col)[:4].isdigit() else col
    for col in weather_df_summary.columns
]

weather_df_summary


In [None]:
weather_df_summary.rename(columns={
    'lat': 'area_latitude',
    'lon': 'area_longitude'
}, inplace=True)
weather_df_summary

In [None]:
!pip install --upgrade google-cloud-bigquery
from google.colab import auth
auth.authenticate_user()
from google.cloud import bigquery

# Initialize client
client = bigquery.Client(project='rock-finder-project')

import pandas_gbq
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

sql = "SELECT * FROM rock-finder-project.routes.routes_gold"
p_id="rock-finder-project"
df_routes_gold = pandas_gbq.read_gbq(sql, project_id=p_id)
df_routes_gold

In [None]:
df_with_weather = df_routes_gold.merge(
    weather_df_summary,
    on=['area_latitude', 'area_longitude'],  
    how='left'        
)
df_with_weather

In [None]:
df_with_weather = df_with_weather[['key', 'area_latitude', 'area_longitude', 'June 05 2025', 'June 06 2025', 'June 07 2025', 'June 08 2025', 'June 09 2025']]
df_with_weather

In [None]:
import pandas as pd
from pandas_gbq import to_gbq
from google.colab import auth

# Authenticate with Google account
auth.authenticate_user()

# Set project and table ID
project_id = 'rock-finder-project'
table_id = 'routes.df_with_weather'  # dataset.table 형식

# Upload dataframe
to_gbq(
    dataframe=df_with_weather,
    destination_table=table_id,
    project_id=project_id,
    if_exists='fail'  # 또는 'replace' / 'append'
)