In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np
import pandas as pd

data = pd.read_csv('./data/data.csv')
data

# Check for missing values
missing_values = data.isnull().sum()
print("Missing values in each column:\n", missing_values)

# Drop rows with missing values (if any)
data = data.dropna()

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cloud Cover', 'Season', 'Location', 'Weather Type']

for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Normalize numerical features
numerical_columns = ['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)', 
                     'Atmospheric Pressure', 'UV Index', 'Visibility (km)']

scaler = StandardScaler()
data[numerical_columns] = scaler.fit_transform(data[numerical_columns])

# Separate features and target
X = data.drop('Weather Type', axis=1)
y = data['Weather Type']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Missing values in each column:
 Temperature             0
Humidity                0
Wind Speed              0
Precipitation (%)       0
Cloud Cover             0
Atmospheric Pressure    0
UV Index                0
Season                  0
Visibility (km)         0
Location                0
Weather Type            0
dtype: int64


In [3]:
label_encoders

{'Cloud Cover': LabelEncoder(),
 'Season': LabelEncoder(),
 'Location': LabelEncoder(),
 'Weather Type': LabelEncoder()}

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define the neural network architecture
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(np.unique(y)), activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
test_accuracy = accuracy * 100

# Make predictions
predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)

# Display the first few predictions
print(f"Test Accuracy: {test_accuracy:.2f}%")
print("Predicted classes for the first 10 test samples:", predicted_classes[:10])
print("True classes for the first 10 test samples:", y_test[:10].values)


Test Accuracy: 90.68%
Predicted classes for the first 10 test samples: [3 3 3 2 3 2 3 1 2 2]
True classes for the first 10 test samples: [3 3 3 2 3 2 3 1 3 2]


In [7]:
y_test

4111     3
10607    3
7372     3
11786    2
12227    3
        ..
2543     1
96       1
2474     2
2522     2
3393     0
Name: Weather Type, Length: 2640, dtype: int32

In [None]:
#Kolom numerik yang akan diperiksa outliers nya
numeric_columns = ['Temperature', 'Humidity', 'Wind Speed','Precipitation (%)','Atmospheric Pressure','UV Index','Visibility (km)']

# Menampilkan boxplot untuk masing-masing kolom
plt.figure(figsize=(10, 5))
for i, column in enumerate(numeric_columns, 1):
    plt.subplot(2, 4, i)
    sns.boxplot(y=data[column])
    plt.title(f'Boxplot of {column}')
plt.tight_layout()
plt.show()

In [None]:

#Fungsi untuk menghapus outlier
def remove_outliers_iqr(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]

#Menghapus outliers dari masing-masing kolom
for column in numeric_columns:
    data = remove_outliers_iqr(data, column)

In [1]:
import tensorflow as tf
import numpy as np

# Contoh data numerik
data = np.array([[10.0, 20.0, 30.0],
                 [15.0, 25.0, 35.0],
                 [20.0, 30.0, 40.0]])

# Buat layer normalisasi
normalizer = tf.keras.layers.Normalization()

# Adapt layer normalisasi pada data
normalizer.adapt(data)

# Terapkan normalisasi pada data
normalized_data = normalizer(data)

# Cetak hasil normalisasi
print("Data asli:\n", data)
print("Data yang telah dinormalisasi:\n", normalized_data.numpy())


Data asli:
 [[10. 20. 30.]
 [15. 25. 35.]
 [20. 30. 40.]]
Data yang telah dinormalisasi:
 [[-1.2247449 -1.2247449 -1.2247449]
 [ 0.         0.         0.       ]
 [ 1.2247449  1.2247449  1.2247449]]
