In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
dataset = pd.read_csv('weather.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [4]:
print(X)

[[0 'Barisal' 1949 ... 22.7 90.36 4]
 [1 'Barisal' 1950 ... 22.7 90.36 4]
 [2 'Barisal' 1951 ... 22.7 90.36 4]
 ...
 [21117 'Teknaf' 2011 ... 20.87 92.26 4]
 [21118 'Teknaf' 2012 ... 20.87 92.26 4]
 [21119 'Teknaf' 2013 ... 20.87 92.26 4]]


In [5]:
print(y)

[1949.01 1950.01 1951.01 ... 2011.12 2012.12 2013.12]


In [6]:
print(dataset.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21120 entries, 0 to 21119
Data columns (total 18 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Unnamed: 0         21120 non-null  int64  
 1   Station Names      21120 non-null  object 
 2   YEAR               21120 non-null  int64  
 3   Month              21120 non-null  int64  
 4   Max Temp           21120 non-null  float64
 5   Min Temp           21120 non-null  float64
 6   Rainfall           21120 non-null  float64
 7   Relative Humidity  21120 non-null  float64
 8   Wind Speed         21119 non-null  float64
 9   Cloud Coverage     21120 non-null  float64
 10  Bright Sunshine    21118 non-null  float64
 11  Station Number     21118 non-null  float64
 12  X_COR              21119 non-null  float64
 13  Y_COR              21119 non-null  float64
 14  LATITUDE           21120 non-null  float64
 15  LONGITUDE          21120 non-null  float64
 16  ALT                211

In [7]:
print(dataset.isnull().sum())

Unnamed: 0           0
Station Names        0
YEAR                 0
Month                0
Max Temp             0
Min Temp             0
Rainfall             0
Relative Humidity    0
Wind Speed           1
Cloud Coverage       0
Bright Sunshine      2
Station Number       2
X_COR                1
Y_COR                1
LATITUDE             0
LONGITUDE            0
ALT                  0
Period               0
dtype: int64


In [8]:
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()

In [9]:
print(X)

[[0 'Barisal' 1949 ... 22.7 90.36 4]
 [1 'Barisal' 1950 ... 22.7 90.36 4]
 [2 'Barisal' 1951 ... 22.7 90.36 4]
 ...
 [21117 'Teknaf' 2011 ... 20.87 92.26 4]
 [21118 'Teknaf' 2012 ... 20.87 92.26 4]
 [21119 'Teknaf' 2013 ... 20.87 92.26 4]]


In [10]:
from sklearn.preprocessing import LabelEncoder
# Encoding target "Anaemic" menggunakan LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [11]:
print(y)

[ 12  24  36 ... 767 779 791]


In [12]:
from sklearn.model_selection import train_test_split

# test_size = 0.2 berarti 20% dari data akan digunakan untuk pengujian
# random_state = 0 untuk memastikan hasil yang konsisten setiap kali kode dijalankan
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 0)

In [13]:
print(X_train)

[[4831 "Cox's Bazar" 1979 ... 21.46 91.98 4]
 [10708 'Khepupara' 1982 ... 21.98 90.22 3]
 [13884 'Mymensingh' 1948 ... 24.75 90.41 19]
 ...
 [9845 'Jessore' 1983 ... 23.17 89.22 7]
 [10799 'Khulna' 1995 ... 22.8 89.58 4]
 [2732 'Chandpur' 1998 ... 23.26 90.67 7]]


In [14]:
print(X_test)

[[8085 'Feni' 1995 ... 23.01 91.37 8]
 [5792 'Dhaka' 1980 ... 23.78 90.39 9]
 [18397 'Sitakunda' 2008 ... 22.64 91.64 4]
 ...
 [16321 'Rangpur' 1994 ... 25.72 89.26 34]
 [18394 'Sitakunda' 2005 ... 22.64 91.64 4]
 [5625 'Dhaka' 1996 ... 23.78 90.39 9]]


In [15]:
print(y_train)

[374 419   6 ... 425 564 611]


In [16]:
print(y_test)

[571 389 726 ... 556 690 578]


In [17]:
from sklearn.preprocessing import StandardScaler
# Menerapkan standarisasi pada fitur numerik (mulai dari kolom ke-3)
sc = StandardScaler()
X_train[:, 2:] = sc.fit_transform(X_train[:, 2:]) 
X_test[:, 2:] = sc.transform(X_test[:, 2:])

In [18]:
print(X_train)

[[4831 "Cox's Bazar" -0.3945087492951322 ... -1.6272317006135582
  1.3582605509338226 -0.665478348950173]
 [10708 'Khepupara' -0.22375304102352359 ... -1.1825874214767567
  -0.20331059682677896 -0.7395158169082822]
 [13884 'Mymensingh' -2.1589844014350876 ... 1.1859984500788985
  -0.034731893375352874 0.44508367042146446]
 ...
 [9845 'Jessore' -0.16683447159965403 ... -0.16503609037522854
  -1.0905669307816637 -0.4433659450758455]
 [10799 'Khulna' 0.5161883614867804 ... -0.48141759668410755
  -0.7711546505579057 -0.665478348950173]
 [2732 'Chandpur' 0.686944069758389 ... -0.08807842667847451
  0.19595475345292168 -0.4433659450758455]]


In [19]:
print(X_test)

[[8085 'Feni' 0.5161883614867804 ... -0.3018497147250138
  0.8170341872213435 -0.3693284771177363]
 [5792 'Dhaka' -0.33759017987126266 ... 0.3565658524583269
  -0.052477020054447036 -0.29529100915962714]
 [18397 'Sitakunda' 1.2561297639970843 ... -0.6182312210338928
  1.0565933973891588 -0.665478348950173]
 ...
 [16321 'Rangpur' 0.45926979206291085 ... 2.01543104769947
  -1.0550766774234628 1.555645689793102]
 [18394 'Sitakunda' 1.0853740557254756 ... -0.6182312210338928
  1.0565933973891588 -0.665478348950173]
 [5625 'Dhaka' 0.5731069309106499 ... 0.3565658524583269
  -0.052477020054447036 -0.29529100915962714]]
