In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

df=sns.load_dataset('tips')
df.head()

X=df[['total_bill','size','day']]
Y=df['tip']

scalar=StandardScaler()

X[['total_bill','size']]=scalar.fit_transform(X[['total_bill','size']])

le=LabelEncoder()
X['day']=le.fit_transform(X['day'])

X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

model=LinearRegression()

model.fit(X_train,Y_train)

y_pred=model.predict(X_test)

print("Mean Absolute Error:",mean_absolute_error(Y_test,y_pred))
print("Mean Squared Error:",mean_squared_error(Y_test,y_pred))
print("Root Mean Squared Error:",np.sqrt(mean_squared_error(Y_test,y_pred)))
print("R2 Score:",r2_score(Y_test,y_pred))


Mean Absolute Error: 0.6656656656006282
Mean Squared Error: 0.6566031847646813
Root Mean Squared Error: 0.8103105483483979
R2 Score: 0.4747054104497257


In [2]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')


df=sns.load_dataset('tips')
X=df[['total_bill','size','day']]
Y=df['tip']

numeric_features=['total_bill','size']
categorical_features=['day']

preprocessor=ColumnTransformer(
    transformers=[
        ('num',StandardScaler(),numeric_features),
        ('cat',OneHotEncoder(),categorical_features)
    ]
)


X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)


pipline=Pipeline(steps=[
    ('preprocessor',preprocessor),
    ('model',LinearRegression())
])

pipline.fit(X_train,Y_train)

y_pred=pipline.predict(X_test)

print("Mean Absolute Error:",mean_absolute_error(Y_test,y_pred))
print("Mean Squared Error:",mean_squared_error(Y_test,y_pred))
print("Root Mean Squared Error:",np.sqrt(mean_squared_error(Y_test,y_pred)))
print("R2 Score:",r2_score(Y_test,y_pred))



Mean Absolute Error: 0.6591957722968487
Mean Squared Error: 0.6655725405795047
Root Mean Squared Error: 0.8158262931405831
R2 Score: 0.46752976130485213
