In [1]:
import pandas as pd
import pyowm
from geopy.geocoders import Nominatim
from meteostat import Stations, Daily
from final_files.config import *
from sqlalchemy import create_engine

In [2]:
def query_AI(table, subname='', log=False):
    db_connection_str = f"mysql+pymysql://{config_ai['user']}:{config_ai['password']}@{config_ai['host']}:{config_ai['port']}/{config_ai['database']}"
    engine = create_engine(db_connection_str)
    if subname:
        fullname = table+'_'+subname
    else:
        fullname = table
    df = pd.read_sql(f'SELECT * FROM daytriptour_ai.{fullname}', con=engine)
    if log: 
        print(f'READ DB_AI: {fullname}')
    return df

def query_dtt(table):
    db_connection_str = f"mysql+pymysql://{config_dtt['user']}:{config_dtt['password']}@{config_dtt['host']}:{config_dtt['port']}/{config_dtt['database']}"
    engine = create_engine(db_connection_str)
    df = pd.read_sql(f'SELECT * FROM dtt.{table}', con=engine)
    print(f'READ DB_DTT: {table}')
    return df

In [80]:
df_product = query_dtt('dtt_product')[['product_id', 'province_id']]
df_province = query_dtt('dtt_area')
df_province['province_en'] = df_province['province_en'].str.replace('-', ' ')
df_province['province_th'] = df_province['province_th'].str.replace(' ', '')
df_province = df_province[['province_id', 'province_th', 'province_en']].drop_duplicates('province_id')
df_clean = query_AI('clean_data')[['product_id', 'order_departure_date']]

df_clean = df_clean.merge(df_product, on='product_id', how='left')
df_clean = df_clean.merge(df_province, on='province_id', how='left')

READ DB_DTT: dtt_product
READ DB_DTT: dtt_area


In [81]:
df_clean

Unnamed: 0,product_id,order_departure_date,province_id,province_th,province_en
0,121,2018-10-29,83,ภูเก็ต,Phuket
1,121,2018-10-30,83,ภูเก็ต,Phuket
2,121,2018-11-23,83,ภูเก็ต,Phuket
3,121,2018-09-15,83,ภูเก็ต,Phuket
4,121,2018-09-29,83,ภูเก็ต,Phuket
...,...,...,...,...,...
5512,202,2023-01-11,83,ภูเก็ต,Phuket
5513,202,2023-01-16,83,ภูเก็ต,Phuket
5514,202,2023-05-13,83,ภูเก็ต,Phuket
5515,202,2023-01-16,83,ภูเก็ต,Phuket


In [112]:
# Retrieve the list of weather stations in Thailand
stations = Stations()
stations = stations.region("TH").fetch()
stations = stations.reset_index(drop=False)
# Display the station IDs and names

df_stations = stations[['id', 'name', 'latitude', 'longitude']]
df_stations = df_stations.rename(columns={'id': 'stations_id', 'name': 'stations_name'})

# Create a geocoder object
geolocator = Nominatim(user_agent='my_geocoder')

# Define a function to get the province and country from coordinates
def get_location_info(latitude, longitude):
    location = geolocator.reverse((latitude, longitude), exactly_one=True)
    if location is not None:
        address = location.raw.get('address', {})
        province = address.get('state', '')
        country = address.get('country', '')
        return province, country
    else:
        return '', ''
df_stations[['province', 'country']] = df_stations.apply(lambda row: pd.Series(get_location_info(row['latitude'], row['longitude'])), axis=1)
df_stations['province'] = df_stations['province'].str.replace('จังหวัด', '')
df_stations.loc[df_stations['stations_name'] == 'Mae Hong Son', 'province'] = 'แม่ฮ่องสอน'
df_stations.loc[df_stations['stations_name'] == 'Chiang Mai', 'province'] = 'เชียงใหม่'
df_stations.loc[df_stations['stations_name'] == 'Lamphun', 'province'] = 'ลำพูน'
df_stations.loc[df_stations['stations_name'] == 'Nakhon Phanom Agromet / Ban Namuang', 'province'] = 'นครพนม'
df_stations.loc[df_stations['stations_name'] == 'Bangkok Pilot', 'province'] = 'กรุงเทพมหานคร'
df_stations.loc[df_stations['stations_name'] == 'Aranyaprathet', 'province'] = 'สระแก้ว'
df_stations.loc[df_stations['stations_name'] == 'Taphao / Ban Khlong Kao', 'province'] = 'สมุทรสงคราม'
df_stations

GeocoderUnavailable: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /reverse?lat=18.7667&lon=100.7667&format=json&addressdetails=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))

In [87]:
df_stations = df_stations.merge(df_province, left_on='province', right_on='province_th', how='left')
df_stations

Unnamed: 0,stations_id,stations_name,latitude,longitude,province,country,province_id,province_th,province_en
0,48300,Mae Hong Son,19.3000,97.8333,แม่ฮ่องสอน,မြန်မာ,58,แม่ฮ่องสอน,Mae Hong Son
1,48302,Doi Ang Khang / Ban Palong No Lae,19.9328,99.0453,เชียงใหม่,ประเทศไทย,50,เชียงใหม่,Chiang Mai
2,48303,Chiang Rai,19.9167,99.8333,เชียงราย,ประเทศไทย,57,เชียงราย,Chiang Rai
3,48304,Chaing Rai Agromet / Ban Pa Ko,19.8725,99.7792,เชียงราย,ประเทศไทย,57,เชียงราย,Chiang Rai
4,48307,Thung Chang / Ban Fueai Lung,19.4081,100.8822,น่าน,ประเทศไทย,55,น่าน,Nan
...,...,...,...,...,...,...,...,...,...
118,48583,Narathiwat,6.4167,101.8167,นราธิวาส,ประเทศไทย,96,นราธิวาส,Narathiwat
119,KQFN0,Taphao / Ban Khlong Kao,12.4200,100.0000,สมุทรสงคราม,,75,สมุทรสงคราม,Samut Songkhram
120,VTCB0,Chiang Kham / Ban Waen Phatthana,19.5000,100.2833,พะเยา,ประเทศไทย,56,พะเยา,Phayao
121,VTPL0,Phetchabun / Ban Bung Nua,16.8167,101.2500,เพชรบูรณ์,ประเทศไทย,67,เพชรบูรณ์,Phetchabun


In [111]:
df_stations.columns

Index(['stations_id', 'stations_name', 'latitude', 'longitude', 'province',
       'country', 'province_id', 'province_th', 'province_en'],
      dtype='object')

In [95]:
def get_province_forecast(api_key, df_clean, df_province):
    # Openweathermap API key
    owm = pyowm.OWM(api_key)

    # Create an empty DataFrame to store the combined forecast
    combined_forecast_df = pd.DataFrame(columns=['province', 'date', 'rainfall'])

    # List of places to forecast
    places = df_clean['province_en'].unique()

    # Iterate over each place and retrieve the forecast
    for place in places:
        mgr = owm.weather_manager()
        forecast = mgr.forecast_at_place(place, '3h')

        # Create a list to store forecast data
        forecast_data = []

        # Store the weather forecast data
        for weather in forecast.forecast:
            forecast_data.append({
                'Time': weather.reference_time('iso'),
                'Status': weather.status,
                'Rain volume': weather.rain
            })

        place_forecast_df = pd.DataFrame(forecast_data)

        # Extract the date and time from the 'Time' column
        place_forecast_df['DateTime'] = pd.to_datetime(place_forecast_df['Time'])
        place_forecast_df['Date'] = place_forecast_df['DateTime'].dt.date
        place_forecast_df['Time'] = place_forecast_df['DateTime'].dt.time

        # Group the data by date and perform voting for rainfall status
        place_forecast_df = (
            place_forecast_df[place_forecast_df['Time'].between(pd.to_datetime('06:00').time(), pd.to_datetime('18:00').time())]
            .groupby('Date')['Rain volume']
            .apply(lambda x: 1 if any(x) else 0)
            .reset_index()
        )
        place_forecast_df.columns = ['date', 'rainfall']

        # Add the 'Place' column to the forecast DataFrame
        place_forecast_df['province'] = place

        # Append the place forecast to the combined forecast DataFrame
        combined_forecast_df = pd.concat([combined_forecast_df, place_forecast_df], ignore_index=True)

    # Convert 'Date' column to datetime data type
    combined_forecast_df['date'] = pd.to_datetime(combined_forecast_df['date'])

    # Select the forecast for the next 3 days
    today = pd.to_datetime('today').date()
    next_3_days = pd.date_range(start=today, periods=3)

    selected_forecast = combined_forecast_df[combined_forecast_df['date'].isin(next_3_days)]

    # Group the data by province and perform voting for rainfall status
    province_forecast = selected_forecast.groupby('province')['rainfall'].apply(lambda x: x.mode()[0]).reset_index()

    province_forecast.columns = ['province', 'rainfall_forecast']

    province_forecast = pd.merge(province_forecast, df_province[['province_en', 'province_id']], left_on='province', right_on='province_en', how='left')

    return province_forecast

In [96]:
api_key = '90890dd21edc71db1a02b25b51bd6456'
province_forecast = get_province_forecast(api_key, df_clean, df_province)
province_forecast

Unnamed: 0,province,rainfall_forecast,province_en,province_id
0,Bangkok,1,Bangkok,10
1,Chiang Mai,1,Chiang Mai,50
2,Chon Buri,1,Chon Buri,20
3,Krabi,1,Krabi,81
4,Phang nga,1,Phang nga,82
5,Phuket,1,Phuket,83
6,Surat Thani,1,Surat Thani,84


In [98]:
# Function to get rainfall for a specific date and province with weighted voting
def get_rainfall(date, province_id):
    # Get the weather stations for the province
    stations = df_stations[df_stations['province_id'] == province_id]['stations_id'].tolist()

    if len(stations) > 1:
        # If multiple stations, use weighted voting to determine rainfall
        votes = []
        weights = []
        for station in stations:
            data = Daily(station, date, date)
            data = data.fetch()
            if len(data) > 0:
                prcp = data['prcp'].iloc[0]
                if prcp > 0:
                    votes.append(1)
                else:
                    votes.append(0)
                weights.append(prcp)

        if len(votes) > 0:
            weighted_vote = sum([vote * weight for vote, weight in zip(votes, weights)])
            weighted_sum = sum(weights)
            if weighted_sum != 0:
                rainfall = 1 if weighted_vote / weighted_sum > 0 else 0
            else:
                rainfall = 0
        else:
            rainfall = 0
    elif len(stations) == 1:
        # If single station, retrieve rainfall directly
        data = Daily(stations[0], date, date)
        data = data.fetch()
        rainfall = 1 if len(data) > 0 and data['prcp'].iloc[0] > 0 else 0
    else:
        rainfall = 0

    return rainfall

In [100]:
df_clean['rainfall'] = df_clean.apply(lambda row: get_rainfall(row['order_departure_date'], row['province_id']), axis=1)
df_clean

Unnamed: 0,product_id,order_departure_date,province_id,province_th,province_en,rainfall
0,121,2018-10-29,83,ภูเก็ต,Phuket,1
1,121,2018-10-30,83,ภูเก็ต,Phuket,1
2,121,2018-11-23,83,ภูเก็ต,Phuket,1
3,121,2018-09-15,83,ภูเก็ต,Phuket,1
4,121,2018-09-29,83,ภูเก็ต,Phuket,1
...,...,...,...,...,...,...
5512,202,2023-01-11,83,ภูเก็ต,Phuket,1
5513,202,2023-01-16,83,ภูเก็ต,Phuket,0
5514,202,2023-05-13,83,ภูเก็ต,Phuket,0
5515,202,2023-01-16,83,ภูเก็ต,Phuket,0


In [102]:
history_rainfall_product = df_clean.groupby('product_id')['rainfall'].agg(rainy_days='sum', non_rainy_days=lambda x: len(x) - sum(x)).reset_index()
history_rainfall_product = pd.merge(history_rainfall_product, df_product[['product_id', 'province_id']], on='product_id', how='left')
history_rainfall_product

Unnamed: 0,product_id,rainy_days,non_rainy_days,province_id
0,29,8,13,83
1,33,1,2,83
2,34,182,26,83
3,35,2,12,81
4,38,48,40,83
...,...,...,...,...
98,197,2,0,83
99,198,4,2,20
100,199,4,0,10
101,200,5,0,10


In [105]:
province_forecast

Unnamed: 0,province,rainfall_forecast,province_en,province_id
0,Bangkok,1,Bangkok,10
1,Chiang Mai,1,Chiang Mai,50
2,Chon Buri,1,Chon Buri,20
3,Krabi,1,Krabi,81
4,Phang nga,1,Phang nga,82
5,Phuket,1,Phuket,83
6,Surat Thani,1,Surat Thani,84


In [109]:
def get_recommended_weather_products(province_forecast, history_rainfall_product):
    # Merge the two DataFrames based on 'province_id'
    merged_data = pd.merge(province_forecast, history_rainfall_product, on='province_id', how='left')

    # Filter products based on the rainfall forecast
    rainy_products = merged_data.loc[merged_data['rainfall_forecast'] == 1]
    non_rainy_products = merged_data.loc[merged_data['rainfall_forecast'] == 0]

    # Check if rainy_days is greater than non_rainy_days for each product
    rainy_products = rainy_products.loc[rainy_products['rainy_days'] >= rainy_products['non_rainy_days']]
    non_rainy_products = non_rainy_products.loc[non_rainy_products['rainy_days'] <= non_rainy_products['non_rainy_days']]

    # Combine product_ids from rainy and non-rainy products
    recommended_weather_products = rainy_products['product_id'].tolist() + non_rainy_products['product_id'].tolist()
    recommended_weather_products = [int(x) for x in recommended_weather_products]

    return recommended_weather_products

In [110]:
recommended_weather_products = get_recommended_weather_products(province_forecast, history_rainfall_product)
print(recommended_weather_products)

[127, 138, 149, 167, 199, 200, 166, 198, 34, 38, 39, 46, 49, 52, 56, 58, 60, 69, 75, 79, 81, 84, 90, 115, 121, 125, 128, 131, 150, 156, 173, 184, 195, 197, 202]


In [113]:
df_product = query_dtt('dtt_product')
ls = df_product[df_product['is_active']==0]['product_id'].tolist()

READ DB_DTT: dtt_product


In [119]:
df_product.query(f"product_id not in {recommended_weather_products} & is_active == 0")

Unnamed: 0,product_id,product_code,product_slug,product_thumbnail,age_adult,age_child,age_elder,age_infant,ordering,trash,...,height_elder,is_height_infant,height_infant,country_id,postal_code,set_dayoff,is_approved,date_approve,approver,view_count
0,26,test,test,uploads/products/ph3_9ba8dc5434674bf391dc1a3ac...,,,,,,0,...,,0,,218,,0,1,2017-11-14 19:25:00,23,0
1,25,PKNIGHT,phuket-night-life-tour,uploads/products/q_4a5e7ff234133e3cbc674935c7d...,,,,,,0,...,,0,,218,,0,1,2017-10-18 22:58:00,23,0
2,24,DIVING_FUN01,best-diving,uploads/products/phuket-diving_dce2e9561b7a059...,,,,,,1,...,,0,,218,,0,1,2017-10-18 22:50:00,23,0
3,22,BEACH_A01,beautiful_beach,uploads/products/home_21a0861bd07b72836646dabe...,,,,,,1,...,,0,,218,,0,1,2017-10-18 22:20:00,23,0
4,23,BEACH_A02,most_beautiful_beach,uploads/products/phuket_930e20350162e12e0cf304...,,,,,,1,...,,0,,218,,0,1,2017-10-18 22:21:00,23,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,191,PNA009,james-bond-and-khai-island-from-khaolak,uploads/products/james_bond_and_khai_island_fr...,12-70,4-11,,0-3,,0,...,,0,,218,,0,1,2020-01-06 12:52:36,23,1
170,192,HKT083,vibe-boat-club,uploads/products/vibe_boat_club_3_bb20c39b2bf8...,16+,,,,,1,...,,0,,218,,0,1,2020-01-06 14:08:39,23,3
172,194,BKK026,the-hidden-of-bangkok,uploads/products/e06aab4e-fb09-41e2-ac46-2798b...,11+,2-11,,0-2,,0,...,,0,,218,,0,1,2020-05-06 15:36:32,23,0
173,203,PAT015,underwater-world-pattaya,uploads/products/underwater_world_pattaya_1_th...,130 ซม.+,90-130 ซม.,,&lt; 90 ซม.,,0,...,,0,,218,,0,1,2022-10-09 15:22:59,23,0


In [118]:
len(recommended_weather_products)

35