![Imgur](https://i.imgur.com/VryXoBw.png)

## ①　Introduction
This is an information about an avid golf player (Player A) who has been playing golf regularly for years. (Of course not everyday!)

🎯　Your goal is to make a prediction of whether the value of the column`Play_A` is 0 or 1. 

You are free to add/remove sections and made any modification to this notebook. Only your final submission will be graded. This notebook will not be graded.

In this notebook, you will mainly work on Section ④ (Preprocessing) and Section ⑦ (Model Building).

## ②　Setting Up the Environment
These are all the libraries used in the lecture.

In [None]:
# Basic Libraries (L1)
import pandas as pd
import numpy  as np
import warnings
warnings.filterwarnings('ignore')

# Data Preprocessing (L2)
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler

# Data Exploration (L3)
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm, lognorm, expon, uniform, gamma
from scipy.stats import probplot, chi2_contingency

# Basic Classifiers & Regressors (L4-5)
from sklearn.linear_model import LogisticRegression,     LinearRegression, Lasso, Ridge
from sklearn.naive_bayes  import BernoulliNB,            GaussianNB
from sklearn.neighbors    import KNeighborsClassifier,   KNeighborsRegressor
from sklearn.dummy        import DummyClassifier,        DummyRegressor
from sklearn.tree         import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.svm          import SVC,                    SVR

# Ensemble Classifiers & Regressors (L6)
from sklearn.ensemble     import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.ensemble     import RandomForestClassifier,     RandomForestRegressor
from sklearn.ensemble     import StackingClassifier,         StackingRegressor
from sklearn.ensemble     import AdaBoostClassifier,         AdaBoostRegressor
from sklearn.ensemble     import BaggingClassifier,          BaggingRegressor
from sklearn.ensemble     import VotingClassifier,           VotingRegressor
from catboost             import CatBoostClassifier,         CatBoostRegressor
import lightgbm as lgb
import xgboost  as xgb

# Classification & Regression Metrics (L7)
from sklearn.metrics      import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics      import confusion_matrix
from sklearn.metrics      import roc_curve, roc_auc_score
from sklearn.metrics      import mean_squared_error, mean_absolute_error
from sklearn.metrics      import median_absolute_error, mean_squared_log_error
from sklearn.metrics      import r2_score

# Model Calibration (L7)
from sklearn.calibration  import calibration_curve
from sklearn.metrics      import brier_score_loss, log_loss

# Model Selection & Validation (L7)
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold, LeaveOneOut

# Hyperparameter Optimization (L7)
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from skopt                   import BayesSearchCV
from skopt.space             import Real, Categorical, Integer

# Multiclass Classification (L8)
from sklearn.multiclass      import OneVsOneClassifier,    OneVsRestClassifier
from sklearn.metrics         import classification_report, precision_recall_fscore_support

# Model Weighting (L8)
from sklearn.utils.class_weight import compute_sample_weight

# Resampling Techniques (L8)
from imblearn.over_sampling  import RandomOverSampler,  SMOTE,    ADASYN
from imblearn.under_sampling import RandomUnderSampler, NearMiss, TomekLinks
from imblearn.combine        import SMOTEENN

## ③　Data Loading
Note that the target values of the test data is missing (NaN). This is hidden from you and your job is to predict them.

In [None]:
# Load the dataset
train_url = "/kaggle/input/prediction-of-golf-play-based-on-weather-condition/train.csv"
test_url  = "/kaggle/input/prediction-of-golf-play-based-on-weather-condition/test.csv"

df_train = pd.read_csv(train_url, index_col=0)
df_test = pd.read_csv(test_url, index_col=0)

df = pd.concat([df_train,df_test], axis=0)

df

## ④　Combined Preprocessing

In [None]:
# Preprocess the Dataset here. 
df = pd.get_dummies(df) # Converting categorical to dummies (to get you started!)

## ⑤　Exploratory Data Analysis

In [None]:
# Perform EDA here. (If you're too lazy, you can skip this.)
# Basic Overview of our data
print(df.describe())

# Data Distribution - Histograms
df.hist(figsize=(12, 10))
plt.show()

# Target Variable Analysis (if applicable)
# For example, assuming 'target_variable' is your target variable
plt.figure(figsize=(8, 6))
sns.countplot(x='Play_A', data=df)
plt.show()

## ⑦　Data Splitting for Modeling
Training data: Rows where `Play_A` is not missing.
Testing data: Rows where `Play_A` is missing.

In [None]:
# Split the processed data back into training and testing sets.
df_train = df[df['Play_A'].notna()]
df_test = df[df['Play_A'].isna()]

X_train, y_train = df_train.drop('Play_A', axis=1), df_train['Play_A']
X_test = df_test.drop('Play_A', axis=1)

## ⑧　Model Building
Perform your model building, evaluation, validation, and hyperparameter tuning here.

In [None]:
# Define & fit the classifier model
# Define & fit the model
clf = VotingClassifier(estimators=[
                                ('knn', KNeighborsClassifier(n_neighbors=3)),
                                ('dt', DecisionTreeClassifier(max_depth=3)),
                                ('lr', LogisticRegression(C=0.1)),
                                ('brn', BernoulliNB()),
                                ('svc', SVC()),
                                ('ada', AdaBoostClassifier())
                              ])
clf.fit(X_train, y_train)

# Perform cross-validation to check Performance
scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='precision')

# Evaluate the model
print(f"Precision: {scores.mean().round(3)} ± {2*scores.std().round(3)}" )

## ⑨　Making Predictions on Test Data 

In [None]:
# Make Prediction of the Unseen Data
y_pred = clf.predict(X_test)

## ⑩　Submitting the Prediction

In [None]:
# Make DataFrame for Submission
submission = pd.DataFrame(y_pred, index=df_test.index, columns=['Play_A'])

# Make a CSV file for Submission
submission.to_csv('submission.csv')

submission