In [None]:
"""......................................................................................................"""
# This library is used for structured data operations, like import CSV files, create data frames, and data preparation
import pandas as pd
#This is a mathematical library. Has a powerful N-dimensional array object, linear algebra, Fourier transform, etc.
import numpy as np
#The Simple Imputer class from sklearn.impute is a useful tool for handling missing values in datasets.
from sklearn.impute import SimpleImputer
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore",category = DeprecationWarning)


# Set the display options to show more rows and columns
pd.set_option('display.max_columns', None)  # No column limit
pd.set_option('display.max_rows', None)  # No row limit
pd.set_option('display.max_seq_item', None)  # No item truncation for sequences (like lists)



In [None]:
# Load dataset

file_path = '/kaggle/input/british-airways-reviews/british_airways_reviews.csv'  # Update this with the correct file path
df = pd.read_csv(file_path)


print("Missing values in data frame:\n", df.isnull().sum())
print("shape of  Actual data")
print(df.shape)

df= df.dropna(subset=['Route', 'Date Flown','Type of Traveller','Seat Type','Trip Status'])

print("shape of data after applying dropna")
print(df.shape)



In [None]:
print("Missing values before imputation:\n", df.isnull().sum())

# Define columns that need imputation for ratings
columns_to_impute = ['Seat Comfort Rating', 'Staff Service Rating', 'Food & Beverages Rating',
                     'Inflight Entertainment Rating', 'Ground Service Rating']

# Initialize SimpleImputer with the most frequent strategy
imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')

# Group by 'type_of_traveller' and apply imputation to each group
for traveller_type in df['Type of Traveller'].unique():
    # Select rows for the current traveller type
    subset = df[df['Type of Traveller'] == traveller_type]

    # Apply the imputer only to this subset
    df.loc[df['Type of Traveller'] == traveller_type, columns_to_impute] = imputer.fit_transform(
        subset[columns_to_impute])

print("Missing values after imputation:\n", df.isnull().sum())





In [None]:
#Returns descriptive statistics about the data like mean, minimum, maximum, standard deviation, etc.
print("Descriptive Statistics of the Data Frame:","\n",df.describe())

#info() is used to generate the summary of the Data Frame, this will include info about columns with their names,
#their datatypes, and missing values.
print("Generate the summary of the Data Frame:")
print(df.info())

#df.shape() display Total no of rows and columns data frame
print("Total no of rows and columns Data Frame:",df.shape)



In [None]:
# Get the unique values along with their counts for each column
seat_type_counts = df['Seat Type'].value_counts()
route_counts = df['Route'].value_counts()
traveller_type_counts = df['Type of Traveller'].value_counts()

# Print the results
print("Unique values and their counts for 'Seat Type':\n", seat_type_counts)
print("\nUnique values and their counts for 'Route':\n", route_counts)
print("\nUnique values and their counts for 'Type of Traveller':\n", traveller_type_counts)