In [None]:
# 🧭 Step-by-Step Process
# 1. Load the Data
import seaborn as sns

df = sns.load_dataset('tips')
print(df.head())

# | Index | Total Bill | Tip  | Sex    | Smoker | Day | Time   | Size |
# | ----- | ---------- | ---- | ------ | ------ | --- | ------ | ---- |
# | 0     | 16.99      | 1.01 | Female | No     | Sun | Dinner | 2    |
# | 1     | 10.34      | 1.66 | Male   | No     | Sun | Dinner | 3    |
# | 2     | 21.01      | 3.50 | Male   | No     | Sun | Dinner | 3    |
# | 3     | 23.68      | 3.31 | Male   | No     | Sun | Dinner | 2    |
# | 4     | 24.59      | 3.61 | Female | No     | Sun | Dinner | 4    |

# ✅ Explanation: We load the "tips" dataset, which contains info like 
# total_bill, tip, sex, smoker, day, Time,Size.

# 2. Define Independent & Dependent Features
X = df.drop('total_bill', axis=1) # removing the TOtal Bill Column and rest is saved ion X as Independent Feature
y = df["total_bill"] # y is Dependent Feature 
# ✅ Explanation:
# •	X: All features except total_bill
# •	y: Target variable we want to predict (total bill)

# 3. Train-Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=10)
# ✅ Explanation:
# Split the dataset so we can train on one part and test on another to avoid overfitting (this prevents data leakage).

df['sex'].value_counts()
# sex
# Male      157
# Female     87
# Name: count, dtype: int64

df['smoker'].value_counts()
# smoker
# No     151
# Yes     93
# Name: count, dtype: int64

df['day'].value_counts()
# day
# Sat     87
# Sun     76
# Thur    62
# Fri     19
# Name: count, dtype: int64

df['time'].value_counts()
# time
# Dinner    176
# Lunch      68
# Name: count, dtype: int64



# 4. Label Encoding for Binary Categorical Columns
from sklearn.preprocessing import LabelEncoder

# 3 binary category
le_sex = LabelEncoder()
le_smoker = LabelEncoder()
le_time = LabelEncoder()


# Encode 'sex' column in training data: converts 'Male'/'Female' to 0/1
X_train['sex'] = le_sex.fit_transform(X_train['sex'])
# Encode 'smoker' column in training data: converts 'Yes'/'No' to 1/0
X_train['smoker'] = le_smoker.fit_transform(X_train['smoker'])
# Encode 'time' column in training data: converts 'Lunch'/'Dinner' to 0/1
X_train['time'] = le_time.fit_transform(X_train['time'])


X_train.head()
# | Index | tip  | sex | smoker | day | time | size |
# | ----- | ---- | --- | ------ | --- | ---- | ---- |
# | 58    | 1.76 | 1   | 1      | Sat | 0    | 2    |
# | 1     | 1.66 | 1   | 0      | Sun | 0    | 3    |
# | 2     | 3.50 | 1   | 0      | Sun | 0    | 3    |
# | 68    | 2.01 | 1   | 0      | Sat | 0    | 2    |
# | 184   | 3.00 | 1   | 1      | Sun | 0    | 2    |

# Now same transformation done with test dataset
X_test['sex'] = le_sex.transform(X_test['sex'])
X_test['smoker'] = le_smoker.transform(X_test['smoker'])
X_test['time'] = le_time.transform(X_test['time'])

X_test.head()
# | Index | tip  | sex | smoker | day | time | size |
# | ----- | ---- | --- | ------ | --- | ---- | ---- |
# | 162   | 2.00 | 0   | 0      | Sun | 0    | 3    |
# | 60    | 3.21 | 1   | 1      | Sat | 0    | 2    |
# | 61    | 2.00 | 1   | 1      | Sat | 0    | 2    |
# | 63    | 3.76 | 1   | 1      | Sat | 0    | 4    |
# | 69    | 2.09 | 1   | 1      | Sat | 0    | 2    |

# # ✅ Explanation:
# # •	For binary features like sex, smoker, and time, we use Label Encoding (convert to 0/1).
# # •	Apply .fit_transform() on training data and .transform() on test data to avoid leakage.

# Since day has more than 2 category 
# The column 'day' contains categories like 'Sun', 'Sat', 'Thur', 'Fri'. 
# Since machine learning models can't handle text directly, we convert them into numeric dummy columns.

# For example:
# 'Sun' → [0, 0, 0]
# 'Sat' → [1, 0, 0]
# 'Thur' → [0, 1, 0]
# 'Fri' → [0, 0, 1]
# 5. One Hot Encoding for 'day' Feature

#ColumnTransformer applies transformations to specific columns ('day' in this case).
# 'onehot' → name of the transformer.
# OneHotEncoder(drop='first') → converts 'day' into multiple binary columns and drops the first category to avoid multicollinearity.
# ['day'] → apply only to the 'day' column.
# remainder='passthrough' → keep all other columns (like tip, sex, smoker, etc.) as it is.
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(
    transformers=[('onehot', OneHotEncoder(drop='first'), ['day'])],
    remainder='passthrough'
)

X_train = ct.fit_transform(X_train)
X_train
X_test = ct.transform(X_test)
X_test

# 6. Apply Support Vector Regression (SVR)

# We use SVR when we want a powerful and flexible regression model that can handle non-linearity,
#  outliers, and small datasets effectively.
from sklearn.svm import SVR

svr = SVR()
svr.fit(X_train, y_train)

y_pred = svr.predict(X_test)
# ✅ Explanation:
# Train the SVR model on the processed training data and predict on the test data.

# 7. Evaluate the Model

from sklearn.metrics import r2_score, mean_absolute_error

print("R2 Score:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))
# ✅ Explanation:
# •	R2 Score: Measures how well the model predicts (closer to 1 is better)
# •	MAE: Average of errors (lower is better)

# 8. Hyperparameter Tuning using GridSearchCV
from sklearn.model_selection import GridSearchCV

param_grid = {
    'kernel': ['linear', 'rbf', 'poly'],
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 1]
}

grid = GridSearchCV(SVR(), param_grid, cv=5, refit=True)
grid.fit(X_train, y_train)

print("Best Parameters:", grid.best_params_)
# ✅ Explanation:
# Try different SVR parameters (kernel, C, epsilon) using GridSearchCV to find the best model.

# 9. Predict Using Best Model
grid_preds = grid.predict(X_test)

print("R2 Score:", r2_score(y_test, grid_preds))
print("MAE:", mean_absolute_error(y_test, grid_preds))

# R2 Score: 0.46028114561159283
# MAE: 4.1486423210190235
# Best Parameters: {'C': 10, 'epsilon': 1, 'kernel': 'linear'}
# R2 Score: 0.5354352825562995
# MAE: 3.9299196504238987






   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4
R2 Score: 0.46028114561159283
MAE: 4.1486423210190235
Best Parameters: {'C': 10, 'epsilon': 1, 'kernel': 'linear'}
R2 Score: 0.5354352825562995
MAE: 3.9299196504238987
