In [27]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load your airline dataset
data = pd.read_csv("airlines_flights_data.csv")

print("First 5 rows of dataset:")
print(data.head())

# Encode categorical variables
le = LabelEncoder()
for col in ["airline", "flight", "source_city", "departure_time",
            "stops", "arrival_time", "destination_city", "class"]:
    data[col] = le.fit_transform(data[col])

print("\nAfter Encoding:")
print(data.head())

# Define features (X) and target (y)
X = data.drop(columns=["price", "index"])   # drop price (target) & index (not useful)
y = data["price"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

# Train Linear Regression
lrr = LinearRegression()
lrr.fit(X_train, y_train)

# Predictions
y_pred = lrr.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Linear Regression MSE:", mse)
print("Linear Regression R2:", r2)


First 5 rows of dataset:
   index   airline   flight source_city departure_time stops   arrival_time  \
0      0  SpiceJet  SG-8709       Delhi        Evening  zero          Night   
1      1  SpiceJet  SG-8157       Delhi  Early_Morning  zero        Morning   
2      2   AirAsia   I5-764       Delhi  Early_Morning  zero  Early_Morning   
3      3   Vistara   UK-995       Delhi        Morning  zero      Afternoon   
4      4   Vistara   UK-963       Delhi        Morning  zero        Morning   

  destination_city    class  duration  days_left  price  
0           Mumbai  Economy      2.17          1   5953  
1           Mumbai  Economy      2.33          1   5953  
2           Mumbai  Economy      2.17          1   5956  
3           Mumbai  Economy      2.25          1   5955  
4           Mumbai  Economy      2.33          1   5955  

After Encoding:
   index  airline  flight  source_city  departure_time  stops  arrival_time  \
0      0        4    1408            2               2  