In [6]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPClassifier

# Reduce the size of the parameter grid
param_grid = {
    'hidden_layer_sizes': [(50,), (100,)],
    'activation': ['relu'],
    'solver': ['adam'],
    'alpha': [0.0001],
    'learning_rate': ['constant'],
    'batch_size': [32],
    'tol': [1e-4],
    'max_iter': [1000], 
}

# Instantiate the RandomizedSearchCV object
random_search = RandomizedSearchCV(estimator=MLPClassifier(random_state=42),
                                   param_distributions=param_grid,
                                   n_iter=5,  # Reduce the number of iterations
                                   cv=3,  # Reduce the number of cross-validation folds
                                   random_state=42,
                                   n_jobs=-1)  

# Fitting the RandomizedSearchCV object
random_search.fit(X, y)

# Evaluate the best model
best_mlp = random_search.best_estimator_
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)




Best Parameters: {'tol': 0.0001, 'solver': 'adam', 'max_iter': 1000, 'learning_rate': 'constant', 'hidden_layer_sizes': (50,), 'batch_size': 32, 'alpha': 0.0001, 'activation': 'relu'}
Best Cross-Validation Score: 0.835


In [5]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv("weatherAUS.csv")

# Split the dataset into features (X) and labels (y)
y=df['RainToday'].to_numpy()
y=y[:600]
X=df[['MinTemp','MaxTemp']].to_numpy()
X=X[:600]
# Impute missing values with the mean of the respective feature
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)
label_imputer = SimpleImputer(strategy='most_frequent')
y = label_imputer.fit_transform(y.reshape(-1, 1))  # Reshape y to be a 2D array
# Flatten y back to 1D array
y = y.flatten()
# Encode labels as integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize classifiers
classifiers = {
    'Support Vector Machine': SVC(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'CatBoost': CatBoostClassifier(logging_level='Silent'),
    'AdaBoost': AdaBoostClassifier(),
    'XGBoost': XGBClassifier(),
    'Naive Bayes': GaussianNB()
}

# Train classifiers and make predictions
results = {'Classifier': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': []}
for clf_name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    results['Classifier'].append(clf_name)
    results['Accuracy'].append(accuracy)
    results['Precision'].append(precision)
    results['Recall'].append(recall)
    results['F1 Score'].append(f1)

# Convert labels back to original string labels for tabulation
y_test = label_encoder.inverse_transform(y_test)

# Create a dataframe to tabulate the results
results_df = pd.DataFrame(results)

# Display the results
print(results_df)


               Classifier  Accuracy  Precision    Recall  F1 Score
0  Support Vector Machine  0.816667   0.807452  0.816667  0.810937
1           Decision Tree  0.750000   0.771307  0.750000  0.758810
2           Random Forest  0.833333   0.828993  0.833333  0.830877
3                CatBoost  0.825000   0.827499  0.825000  0.826183
4                AdaBoost  0.833333   0.828993  0.833333  0.830877
5                 XGBoost  0.800000   0.805745  0.800000  0.802628
6             Naive Bayes  0.783333   0.613611  0.783333  0.688162


  _warn_prf(average, modifier, msg_start, len(result))
