Call data from open-meteo.com api

In [None]:
!pip install openmeteo-requests
!pip install requests-cache retry-requests numpy pandas

Collecting openmeteo-requests
  Downloading openmeteo_requests-1.1.0-py3-none-any.whl (5.5 kB)
Collecting openmeteo-sdk>=1.4.0 (from openmeteo-requests)
  Downloading openmeteo_sdk-1.7.0-py3-none-any.whl (12 kB)
Installing collected packages: openmeteo-sdk, openmeteo-requests
Successfully installed openmeteo-requests-1.1.0 openmeteo-sdk-1.7.0
Collecting requests-cache
  Downloading requests_cache-1.1.1-py3-none-any.whl (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.3/60.3 kB[0m [31m821.4 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting retry-requests
  Downloading retry_requests-2.0.0-py3-none-any.whl (15 kB)
Collecting cattrs>=22.2 (from requests-cache)
  Downloading cattrs-23.2.3-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.5/57.5 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting url-normalize>=1.4 (from requests-cache)
  Downloading url_normalize-1.4.3-py2.py3-none-any.whl (6.8 kB)
Installing collected

In [None]:
import numpy as np
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from datetime import date, timedelta, datetime, timedelta
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler

In [None]:
def get_weather_api(lat, long):
	# Setup the Open-Meteo API client with cache and retry on error
	cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
	retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
	openmeteo = openmeteo_requests.Client(session = retry_session)

	# Make sure all required weather variables are listed here
	# The order of variables in hourly or daily is important to assign them correctly below
	url = "https://archive-api.open-meteo.com/v1/archive"
	params = {
		"latitude": lat,
		"longitude": long,
		"start_date": "2015-01-01",
		"end_date": "2023-10-31",
		"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "rain", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m"]
	}
	responses = openmeteo.weather_api(url, params=params)

	# Process first location. Add a for-loop for multiple locations or weather models
	response = responses[0]
	print(f"Coordinates {response.Latitude()}°E {response.Longitude()}°N")
	print(f"Elevation {response.Elevation()} m asl")
	print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
	print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

	# Process hourly data. The order of variables needs to be the same as requested.
	hourly = response.Hourly()
	hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
	hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
	hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
	hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
	hourly_rain = hourly.Variables(4).ValuesAsNumpy()
	hourly_pressure_msl = hourly.Variables(5).ValuesAsNumpy()
	hourly_surface_pressure = hourly.Variables(6).ValuesAsNumpy()
	hourly_cloud_cover = hourly.Variables(7).ValuesAsNumpy()
	hourly_cloud_cover_low = hourly.Variables(8).ValuesAsNumpy()
	hourly_cloud_cover_mid = hourly.Variables(9).ValuesAsNumpy()
	hourly_cloud_cover_high = hourly.Variables(10).ValuesAsNumpy()
	hourly_wind_speed_10m = hourly.Variables(11).ValuesAsNumpy()
	hourly_wind_speed_100m = hourly.Variables(12).ValuesAsNumpy()
	hourly_wind_direction_10m = hourly.Variables(13).ValuesAsNumpy()
	hourly_wind_direction_100m = hourly.Variables(14).ValuesAsNumpy()
	hourly_wind_gusts_10m = hourly.Variables(15).ValuesAsNumpy()

	hourly_data = {"date": pd.date_range(
		start = pd.to_datetime(hourly.Time(), unit = "s"),
		end = pd.to_datetime(hourly.TimeEnd(), unit = "s"),
		freq = pd.Timedelta(seconds = hourly.Interval()),
		inclusive = "left"
	)}
	hourly_data["temperature_2m"] = hourly_temperature_2m
	hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
	hourly_data["dew_point_2m"] = hourly_dew_point_2m
	hourly_data["apparent_temperature"] = hourly_apparent_temperature
	hourly_data["rain"] = hourly_rain
	hourly_data["pressure_msl"] = hourly_pressure_msl
	hourly_data["surface_pressure"] = hourly_surface_pressure
	hourly_data["cloud_cover"] = hourly_cloud_cover
	hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
	hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
	hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
	hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
	hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
	hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
	hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
	hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m

	hourly_dataframe = pd.DataFrame(data = hourly_data)
	return hourly_dataframe


In [None]:
# get dataframe of district 1 with latitude = 10.7807 and longtitude = 106.6994
data_quan_1 = get_weather_api(10.7807, 106.6994)

Coordinates 10.790861129760742°E 106.71087646484375°N
Elevation 18.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


In [None]:
hourly_dataframe = pd.DataFrame(data_quan_1)

In [None]:
hourly_dataframe

Unnamed: 0,date,temperature_2m,relative_humidity_2m,dew_point_2m,apparent_temperature,rain,pressure_msl,surface_pressure,cloud_cover,cloud_cover_low,cloud_cover_mid,cloud_cover_high,wind_speed_10m,wind_speed_100m,wind_direction_10m,wind_direction_100m,wind_gusts_10m,predict_rain,predict_heat,predict_cloud
0,2015-01-01 00:00:00,21.489500,85.151405,18.889500,23.705687,0.0,1013.700012,1011.586792,28.799999,11.0,8.0,47.0,7.594207,13.679999,354.559753,360.000000,13.679999,0,0,0
1,2015-01-01 01:00:00,23.489500,69.282570,17.539499,24.984230,0.0,1014.200012,1012.100098,24.299999,12.0,3.0,39.0,8.311245,11.183201,355.030334,356.308685,19.080000,0,0,0
2,2015-01-01 02:00:00,24.889500,62.902657,17.339500,26.351543,0.0,1014.500000,1012.409302,27.000000,7.0,1.0,67.0,7.928178,10.464798,2.602512,3.945108,20.160000,0,0,0
3,2015-01-01 03:00:00,26.139500,59.517963,17.639500,28.154537,0.0,1014.599976,1012.517883,27.000000,1.0,0.0,87.0,7.636753,9.826088,8.130019,8.426887,19.799999,0,0,0
4,2015-01-01 04:00:00,27.339500,56.162460,17.839500,30.580215,0.0,1014.000000,1011.927429,27.900002,2.0,1.0,85.0,6.519877,8.311245,6.340100,4.969664,20.160000,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77419,2023-10-31 19:00:00,24.789499,98.516823,24.539499,30.363483,0.0,1009.799988,1007.718262,24.900000,23.0,1.0,12.0,5.959060,12.758432,334.983124,343.610382,10.080000,0,0,0
77420,2023-10-31 20:00:00,24.639500,97.926659,24.289499,30.209759,0.0,1009.400024,1007.318298,1.500000,1.0,1.0,0.0,4.896529,10.853866,342.897186,354.289490,8.280000,0,0,0
77421,2023-10-31 21:00:00,24.539499,97.339287,24.089500,30.015854,0.0,1009.500000,1007.417358,18.900000,2.0,0.0,57.0,4.680000,9.255571,360.000000,13.495748,6.840000,0,0,0
77422,2023-10-31 22:00:00,24.539499,96.176743,23.889500,29.657166,0.0,1009.900024,1007.816528,30.300001,1.0,1.0,96.0,6.287130,9.504272,23.629398,37.304039,7.559999,0,0,0


In [None]:
weather_data = hourly_dataframe.iloc[:,:].values

In [None]:
# labeling rain data
# no rain => 0 || rainy => 1 || heavy rain => 2
predict_rain = []

for element in weather_data:
  rain = element[5]
  rain_label = -1
  if rain < 0.3:
    rain_label = 0
  else: rain_label = 1
  predict_rain.append(rain_label)

In [None]:
# check if exist any element mislabeled in rain data
nprain = np.array(predict_rain)
print('Rain label:', np.unique(nprain))
unique, counts = np.unique(nprain, return_counts=True)

infor_rain = dict(zip(unique, counts))
data_len = len(weather_data)
for i in range(2):
  print(f'Number of sample in label {i}:', infor_rain[i], '||\t||', ' Account', counts[i]/data_len*100, '% of total data.')

Rain label: [0 1]
Number of sample in label 0: 66155 ||	||  Account 85.44508162843563 % of total data.
Number of sample in label 1: 11269 ||	||  Account 14.554918371564373 % of total data.


In [None]:
# labeling heat data
# cool => 0 || hot => 1 || scorching => 2
predict_heat = []

for element in weather_data:
  heat = element[4]
  heat_label = -1
  if heat < 35:
    heat_label = 0
  elif heat < 37:
    heat_label = 1
  else: heat_label = 2
  predict_heat.append(heat_label)

In [None]:
# check if exist any element mislabeled in heat data
npheat = np.array(predict_heat)
print('Heat label:', np.unique(npheat))
unique, counts = np.unique(npheat, return_counts=True)

infor_heat = dict(zip(unique, counts))
for i in range(3):
  print(f'Number of sample in label {i}:', infor_heat[i], '||\t||', ' Account', counts[i]/data_len*100, '% of total data.')

Heat label: [0 1 2]
Number of sample in label 0: 65639 ||	||  Account 84.77862161603638 % of total data.
Number of sample in label 1: 7094 ||	||  Account 9.162533581318453 % of total data.
Number of sample in label 2: 4691 ||	||  Account 6.058844802645175 % of total data.


In [None]:
# labeling cloud data
# less cloud => 0 || much cloud => 1
predict_cloud = []

for element in weather_data:
  cloud = element[8]
  cloud_label = -1
  if cloud < 50:
    cloud_label = 0
  else: cloud_label = 1
  predict_cloud.append(cloud_label)

In [None]:
# check if exist any element mislabeled in cloud data
npcloud = np.array(predict_cloud)
print('cloud label:', np.unique(npcloud))
unique, counts = np.unique(npcloud, return_counts=True)

infor_cloud = dict(zip(unique, counts))
for i in range(2):
  print(f'Number of sample in label {i}:', infor_cloud[i], '||\t||', ' Account', counts[i]/data_len*100, '% of total data.')

cloud label: [0 1]
Number of sample in label 0: 50865 ||	||  Account 65.69668319900805 % of total data.
Number of sample in label 1: 26559 ||	||  Account 34.30331680099194 % of total data.


In [None]:
hourly_dataframe['predict_rain'] = nprain
hourly_dataframe['predict_heat'] = npheat
hourly_dataframe['predict_cloud'] = npcloud

In [None]:
hourly_dataframe['date'] = hourly_dataframe['date'].astype('string')

In [None]:
datetime = datetime.fromisoformat(hourly_dataframe.iloc[15000, 0])

In [None]:
# define a function to get feature date and target date
def get_date(datetime):
  feature_date = []
  target_date = []
  date = str(datetime)
  for i in range(1, 169):
    # calculate time and add it to list
    last_year_before = datetime.fromisoformat(date) - timedelta(days=365, hours=i)
    last_year_after = datetime.fromisoformat(date) + timedelta(hours=i) - timedelta(days=365)
    last_week_time = datetime.fromisoformat(date) - timedelta(hours=i)
    next_week_time = datetime.fromisoformat(date) + timedelta(hours=i)
    feature_date.append(str(last_year_before))
    feature_date.append(str(last_year_after))
    feature_date.append(str(last_week_time))
    target_date.append(str(next_week_time))
  return feature_date, target_date

In [None]:
feature_date, target_date = get_date(datetime)

In [None]:
print(len(feature_date), len(target_date))

504 168


In [None]:
datetime

datetime.datetime(2016, 9, 17, 0, 0)

In [None]:
print(feature_date[0],feature_date[1],feature_date[2])

2015-09-17 23:00:00 2015-09-18 01:00:00 2016-09-16 23:00:00


In [None]:
columns_rain=['relative_humidity_2m', 'apparent_temperature', 'surface_pressure', 'rain', 'cloud_cover']
columns_heat=['relative_humidity_2m', 'wind_gusts_10m', 'apparent_temperature', 'temperature_2m']
columns_cloud=['rain', 'cloud_cover', 'cloud_cover_low', 'cloud_cover_mid', 'cloud_cover_high']
target_rain=['predict_rain']
target_heat=['predict_heat']
target_cloud=['predict_cloud']

tuần trước năm trước (168h) + tuần sau năm trước (168h) + tuần sau năm này (168h)</br>
=> 1 tuần sau năm này (168h)

In [None]:
def get_data(feature_time, target_time, feature, target):
  data_feat = []
  target_feat = []

  # iterate through feature_time
  for time in feature_time:
    # append found data in list (feature got include week before-later of last year, one week before of this year)
    data = hourly_dataframe[feature].to_numpy()[hourly_dataframe['date'].to_numpy() == time]
    data_feat.append(data)

  # iterate through target_time
  for time in target_time:
    # append found data in list (feature got include one week after of this year)
    data = hourly_dataframe[target].to_numpy()[hourly_dataframe['date'].to_numpy() == time]
    target_feat.append(data)

  # convert the two list to numpy array and flatten them
  data_feat = np.array(data_feat).flatten()
  target_feat = np.array(target_feat).flatten()

  return data_feat, target_feat

In [None]:
feature_data, target_data = get_data(feature_date, target_date, columns_heat, target_heat)

In [None]:
print(feature_data.shape) # 2016 mean 504*4 -> 504 hours total * 4 feature (because i flatten it)
print(target_data.shape)

(2016,)
(168,)


In [None]:
def get_all_of_type(feature_type, target_type):
  feature = []
  target = []
  for i in range(len(hourly_dataframe)):
    date = datetime.fromisoformat(hourly_dataframe.iloc[i, 0])
    feature_date, target_date = get_date(date)
    feature_data, target_data = get_data(feature_date, target_date, feature_type, target_type)
    feature.append(feature_data)
    target.append(target_data)
  feature = np.array(feature)
  target = np.array(target)
  return feature, target


In [None]:
#heat_feature, heat_target = get_all_of_type(columns_heat, target_heat)

In [None]:
print(datetime.fromisoformat(hourly_dataframe.iloc[8783, 0]))

2016-01-01 23:00:00


In [None]:
print(hourly_dataframe[columns_heat].iloc[8784-365*24-1:8784-365*24-169])

       relative_humidity_2m  wind_gusts_10m  apparent_temperature  \
23                74.365158        9.360000             21.157446   
24                73.224449       12.959999             21.143343   
25                61.944687       18.359999             22.991415   
26                59.036236       20.160000             24.965498   
27                57.523548       20.519999             27.074623   
...                     ...             ...                   ...   
77274             94.477852       10.799999             30.268993   
77275             95.326515        9.360000             30.236565   
77276             96.470146        8.640000             30.163628   
77277             95.604889        6.120000             30.331520   
77278             96.175339        5.400000             29.808720   

       temperature_2m  
23          20.039499  
24          20.289499  
25          22.389500  
26          24.089500  
27          25.539499  
...               ...  
772

In [None]:
hourly_dataframe.index[hourly_dataframe['date'].values == str(datetime.fromisoformat('2023-10-01 23:00:00') + timedelta(days=i))]

Int64Index([76727], dtype='int64')

In [None]:
def get_train_test_data(start, end, feature_type, target_type):
  feature = []
  target = []
  start = datetime.fromisoformat(start)
  end = datetime.fromisoformat(end)
  for i in range((end-start).days+1):
    print('----------------------------------')
    print(f'Get {i}...')
    date = start + timedelta(days=i)
    print(f'Checking date:', date)

    # get prev week
    last_week_end = date - timedelta(hours=1)
    last_week_start = date - timedelta(hours=24*7)

    # get prev year
    last_year_start = date - timedelta(days=365, hours=24*7)
    last_year_end = date - timedelta(days=365) + timedelta(hours=24*7)

    # get index of those feature
    last_week_start_index = np.where(hourly_dataframe['date'].values == str(last_week_start))
    last_week_end_index = np.where(hourly_dataframe['date'].values == str(last_week_end))
    last_year_start_index = np.where(hourly_dataframe['date'].values == str(last_year_start))
    last_year_end_index = np.where(hourly_dataframe['date'].values == str(last_year_end))

    # get index of those target
    next_week_index = np.where(hourly_dataframe['date'].values == str(date))

    # error checking
    print('Check indexing:', last_year_start_index[0],last_year_end_index[0],last_week_start_index[0],last_week_end_index[0],next_week_index[0])

    # get data of those feature
    feature_year = hourly_dataframe.loc[int(last_year_start_index[0]):int(last_year_end_index[0]), feature_type].to_numpy(copy=True).flatten()
    feature_week = hourly_dataframe.loc[int(last_week_start_index[0]):int(last_week_end_index[0]), feature_type].to_numpy(copy=True).flatten()

    # target
    target_week = hourly_dataframe.loc[int(next_week_index[0]), target_type].to_numpy(copy=True).flatten()

    feature_np = np.concatenate((feature_year, feature_week), axis=None)
    print('Feature shape:', feature_np.shape)
    print('Target shape:', target_week.shape)
    #target_temp.clear()
    feature.append(feature_np)
    target.append(target_week)
    print('Done!')

  return feature, target

In [None]:
x_heat, y_heat = get_train_test_data('2016-05-01 00:00:00', '2023-10-01 23:00:00', columns_heat, target_heat)
x_heat = np.array(x_heat)
y_heat = np.array(y_heat).astype('int').reshape(-1,)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Target shape: (1,)
Done!
----------------------------------
Get 1996...
Checking date: 2021-10-18 00:00:00
Check indexing: [50640] [50976] [59400] [59567] [59568]
Feature shape: (2020,)
Target shape: (1,)
Done!
----------------------------------
Get 1997...
Checking date: 2021-10-19 00:00:00
Check indexing: [50664] [51000] [59424] [59591] [59592]
Feature shape: (2020,)
Target shape: (1,)
Done!
----------------------------------
Get 1998...
Checking date: 2021-10-20 00:00:00
Check indexing: [50688] [51024] [59448] [59615] [59616]
Feature shape: (2020,)
Target shape: (1,)
Done!
----------------------------------
Get 1999...
Checking date: 2021-10-21 00:00:00
Check indexing: [50712] [51048] [59472] [59639] [59640]
Feature shape: (2020,)
Target shape: (1,)
Done!
----------------------------------
Get 2000...
Checking date: 2021-10-22 00:00:00
Check indexing: [50736] [51072] [59496] [59663] [59664]
Feature shape: (2020,)
Targe

In [None]:
print(x_heat.shape)
print(y_heat.shape)

(2710, 2020)
(2710,)


In [None]:
scaler = StandardScaler()
scaler.fit_transform(x_heat)
uniq_heat, count_heat = np.unique(y_heat, return_counts=True)
print(dict(zip(uniq_heat, count_heat)))

{0: 2705, 1: 5}


In [None]:
x_train_heat, x_test_heat, y_train_heat, y_test_heat = train_test_split(x_heat, y_heat, test_size=0.2, random_state=0)

In [None]:
heat_model = SVC()
heat_model.fit(x_train_heat, y_train_heat)
y_pred_heat = heat_model.predict(x_test_heat)
print(accuracy_score(y_test_heat, y_pred_heat))
print(f1_score(y_test_heat, y_pred_heat))

0.996309963099631
0.0


In [None]:
confusion_matrix(y_test_heat, y_pred_heat)

array([[540,   0],
       [  2,   0]])

In [None]:
x_rain, y_rain = get_train_test_data('2016-05-01 00:00:00', '2023-10-01 23:00:00', columns_rain, target_rain)
x_rain = np.array(x_rain)
y_rain = np.array(y_rain).astype('int').reshape(-1,)
x_train_rain, x_test_rain, y_train_rain, y_test_rain = train_test_split(x_rain, y_rain, test_size=0.2, random_state=0)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Target shape: (1,)
Done!
----------------------------------
Get 1996...
Checking date: 2021-10-18 00:00:00
Check indexing: [50640] [50976] [59400] [59567] [59568]
Feature shape: (2525,)
Target shape: (1,)
Done!
----------------------------------
Get 1997...
Checking date: 2021-10-19 00:00:00
Check indexing: [50664] [51000] [59424] [59591] [59592]
Feature shape: (2525,)
Target shape: (1,)
Done!
----------------------------------
Get 1998...
Checking date: 2021-10-20 00:00:00
Check indexing: [50688] [51024] [59448] [59615] [59616]
Feature shape: (2525,)
Target shape: (1,)
Done!
----------------------------------
Get 1999...
Checking date: 2021-10-21 00:00:00
Check indexing: [50712] [51048] [59472] [59639] [59640]
Feature shape: (2525,)
Target shape: (1,)
Done!
----------------------------------
Get 2000...
Checking date: 2021-10-22 00:00:00
Check indexing: [50736] [51072] [59496] [59663] [59664]
Feature shape: (2525,)
Targe

In [None]:
scaler.fit_transform(x_rain)
uniq_rain, count_rain = np.unique(y_rain, return_counts=True)
print(dict(zip(uniq_rain, count_rain)))

{0: 2527, 1: 183}


In [None]:
rain_model = SVC()
rain_model.fit(x_train_rain, y_train_rain)
y_pred_rain = rain_model.predict(x_test_rain)
print(accuracy_score(y_test_rain, y_pred_rain))
print(f1_score(y_test_rain, y_pred_rain))

0.9391143911439115
0.0


In [None]:
confusion_matrix(y_test_rain, y_pred_rain)

array([[509,   0],
       [ 33,   0]])

In [None]:
x_cloud, y_cloud = get_train_test_data('2016-05-01 00:00:00', '2023-10-01 23:00:00', columns_cloud, target_cloud)
x_cloud = np.array(x_cloud)
y_cloud = np.array(y_cloud).astype('int').reshape(-1,)
x_train_cloud, x_test_cloud, y_train_cloud, y_test_cloud = train_test_split(x_cloud, y_cloud, test_size=0.2, random_state=0)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Target shape: (1,)
Done!
----------------------------------
Get 1996...
Checking date: 2021-10-18 00:00:00
Check indexing: [50640] [50976] [59400] [59567] [59568]
Feature shape: (2525,)
Target shape: (1,)
Done!
----------------------------------
Get 1997...
Checking date: 2021-10-19 00:00:00
Check indexing: [50664] [51000] [59424] [59591] [59592]
Feature shape: (2525,)
Target shape: (1,)
Done!
----------------------------------
Get 1998...
Checking date: 2021-10-20 00:00:00
Check indexing: [50688] [51024] [59448] [59615] [59616]
Feature shape: (2525,)
Target shape: (1,)
Done!
----------------------------------
Get 1999...
Checking date: 2021-10-21 00:00:00
Check indexing: [50712] [51048] [59472] [59639] [59640]
Feature shape: (2525,)
Target shape: (1,)
Done!
----------------------------------
Get 2000...
Checking date: 2021-10-22 00:00:00
Check indexing: [50736] [51072] [59496] [59663] [59664]
Feature shape: (2525,)
Targe

In [None]:
scaler.fit_transform(x_cloud)
uniq_cloud, count_cloud = np.unique(y_cloud, return_counts=True)
print(dict(zip(uniq_cloud, count_cloud)))
remove_index = np.where(y_cloud==0)[0:1055]
x_cloud_new = np.delete(x_cloud, remove_index, 0)
y_cloud_new = np.delete(y_cloud, remove_index, 0)
uniq_cloud_new, count_cloud_new = np.unique(y_cloud_new, return_counts=True)
print(dict(zip(uniq_cloud_new, count_cloud_new)))

{0: 1882, 1: 828}
{1: 828}


In [None]:
cloud_model = SVC()
cloud_model.fit(x_train_cloud, y_train_cloud)
y_pred_cloud = cloud_model.predict(x_test_cloud)
print(accuracy_score(y_test_cloud, y_pred_cloud))
print(f1_score(y_test_cloud,y_pred_cloud))

0.7878228782287823
0.4843049327354261


In [None]:
confusion_matrix(y_test_cloud, y_pred_cloud)

array([[373,  23],
       [ 92,  54]])