**Packeges Imports**

In [451]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('dark_background')
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler


**Load CSV DATA**

In [452]:
dataFrame = pd.read_csv('taxi_trip_pricing.csv')

**Load Columns**

In [None]:
for column in dataFrame.columns:
  print(column)

**Droping unnecessary Columns**

In [454]:
dataFrame.drop(['Base_Fare','Per_Km_Rate','Per_Minute_Rate'] , axis = 1 , inplace = True)

In [None]:
for column in dataFrame.columns:
  print(column)

In [None]:
dataFrame.head(10)

**Encodin Ordinal Data**

In [None]:
Traffic_Conditions_mapper = {"Low": 0 , "Medium" : 1 , "High" : 2}
dataFrame['Traffic_Conditions'] = dataFrame['Traffic_Conditions'].replace(Traffic_Conditions_mapper)


Time_of_Day_mapper = {"Morning": 0 , "Afternoon" : 1 , "Evening" : 2 , "Night" : 3} 
dataFrame['Time_of_Day'] = dataFrame['Time_of_Day'].replace(Time_of_Day_mapper)

Day_of_Week_mapper = {"Weekday": 0 , "Weekend" : 1} 
dataFrame['Day_of_Week'] = dataFrame['Day_of_Week'].replace(Day_of_Week_mapper) 

Weather_mapper = {"Clear": 0 , "Rain" : 1 , "Snow" : 2} 
dataFrame['Weather'] = dataFrame['Weather'].replace(Weather_mapper) 
dataFrame.head(10)





**Num of Null Values**

In [None]:
dataFrame.isnull().sum()

**Filling Null Data**

In [459]:
dataFrame['Trip_Distance_km'] = dataFrame['Trip_Distance_km'].fillna(dataFrame['Trip_Distance_km'].mean())
dataFrame['Trip_Duration_Minutes'] = dataFrame['Trip_Duration_Minutes'].fillna(dataFrame['Trip_Duration_Minutes'].mean())
dataFrame['Trip_Price'] = dataFrame['Trip_Price'].fillna(dataFrame['Trip_Price'].mean())
dataFrame['Traffic_Conditions'] = dataFrame['Traffic_Conditions'].apply(
    lambda x: np.random.choice([0, 1, 2]) if np.isnan(x) else x
)
dataFrame['Passenger_Count'] = dataFrame['Passenger_Count'].apply(
    lambda x: np.random.choice([1, 2, 3, 4]) if np.isnan(x) else x
)

dataFrame['Time_of_Day'] = dataFrame['Time_of_Day'].apply(
    lambda x: np.random.choice([0, 1, 2, 3]) if np.isnan(x) else x
)
dataFrame['Day_of_Week'] = dataFrame['Day_of_Week'].apply(
    lambda x: np.random.choice([0, 1]) if np.isnan(x) else x
)
dataFrame['Weather'] = dataFrame['Weather'].apply(
    lambda x: np.random.choice([0, 1, 2]) if np.isnan(x) else x
)



In [None]:
dataFrame.isnull().sum() 

In [None]:
dataFrame.head(20)

**Ploting The Columns Data**

In [None]:
dataFrame.plot()

**Correlation**

In [None]:
corr_matrix = dataFrame.corr()      
corr_matrix["Trip_Price"].sort_values(ascending=False)

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
colormap = sns.color_palette("BrBG", 10)
sns.heatmap(corr_matrix, cmap=colormap, annot=True, fmt=".2f")
plt.show()

**Value Counting**

In [None]:
dataFrame['Traffic_Conditions'].value_counts()


In [None]:
sns.countplot(x= 'Traffic_Conditions',hue='Traffic_Conditions', data=dataFrame, )
plt.show()

In [None]:
dataFrame['Passenger_Count'].value_counts()

In [None]:
sns.countplot(x= 'Passenger_Count', data=dataFrame, )
plt.show()

In [None]:
dataFrame['Time_of_Day'].value_counts()

In [None]:
sns.countplot(x= 'Time_of_Day',hue='Time_of_Day', data=dataFrame,)
plt.show()

In [None]:
dataFrame['Weather'].value_counts()

In [None]:
sns.countplot(x= 'Weather',hue='Weather', data=dataFrame,)
plt.show()

In [None]:
dataFrame['Day_of_Week'].value_counts()

In [None]:
sns.countplot(x= 'Day_of_Week',hue='Day_of_Week', data=dataFrame)
plt.show()

In [None]:
dataFrame.info()

**Choosing the Features and The Target Value**

In [472]:
features = dataFrame.iloc[:,:-1]
dependentVariable = dataFrame.iloc[:,-1]

**Scaling the Data**

In [473]:
standardScalar = StandardScaler()
features = standardScalar.fit_transform(features)

In [474]:
featuresTrain, featuresTest, dependentVariableTrain, dependentVariableTest = train_test_split(features, dependentVariable , test_size=0.2, random_state=200)

In [None]:
featuresTrain

In [None]:
featuresTest

In [None]:
dependentVariableTrain

In [None]:
dependentVariableTest

**Alogrithm Used :**

In [479]:
model = LinearRegression()
model.fit(featuresTrain, dependentVariableTrain)
predections = model.predict(featuresTest)

**The Accuracy**

In [None]:
rSquaredScore = r2_score(dependentVariableTest, predections)
print("The accuracy is {}%".format(round(rSquaredScore, 2) * 100))



**Results**

In [None]:
results = pd.DataFrame({
    "Actual": dependentVariableTest,
    "Predicted": predections
})
print(results.head(20))

**Ploting Actul vs Predections Data**

In [None]:
import matplotlib.pyplot as plt

plt.scatter(dependentVariableTest, predections, alpha=1)
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.title("Predicted vs Actual")
plt.show()

**A User Input Code**

In [None]:
trip_distance = float(input("Enter the Trip Distance (in km): "))
time_of_day = float(input("Enter the Time of Day : 0 : Morning , 1 : Afternoon , 2 : Evening , 3 : Night: "))
day_of_week = int(input("Enter the Day of Week : 0 : Weekday , 1 : Weekend: "))
passenger_count = int(input("Enter the Passenger Count: 0 : 1 , 1 : 2 , 2 : 3 , 3 : 4: "))
traffic_conditions = float(input("Enter the Traffic Conditions , 0 : Low , 1 : Medium , 2 : High: "))
weather = float(input("Enter the Weather condition , 0 : Clear , 1 : Rain , 2 : Snow: "))
trip_duration = float(input("Enter the Trip Duration could be flout : "))
user_input = [[trip_distance, time_of_day, day_of_week, passenger_count, traffic_conditions, weather, trip_duration]]
user_input_scaled = standardScalar.transform(user_input)
prediction = model.predict(user_input_scaled)
print(f"The predicted Trip Price is: {prediction[0]}")


