In [18]:
import pandas as pd

class FlightDataProcessor:
    def __init__(self, df):
        """
        Initialize the FlightDataProcessor with a DataFrame.
        
        Parameters:
            df (pd.DataFrame): The input DataFrame.
        """
        self.df = df

    def extract_day_month_year(self, date_column):
        """
        Extract day, month, and year from the specified date column.
        """
        if date_column not in self.df.columns:
            raise ValueError(f"The column '{date_column}' does not exist in the DataFrame.")
        
        # Remove the time portion, if present
        self.df[date_column] = self.df[date_column].apply(lambda x: x.split(' ')[0])
        
        # Split the date and create new columns
        date_parts = self.df[date_column].str.split('/', expand=True)
        self.df['day'] = date_parts[0].astype(int)
        self.df['month'] = date_parts[1].astype(int)
        self.df['year'] = date_parts[2].astype(int)
        self.df.drop(date_column, axis=1, inplace=True)

    def extract_time(self, time_column, prefix):
        """
        Extract hour and minute from the specified time column.
        """
        if time_column not in self.df.columns:
            raise ValueError(f"The column '{time_column}' does not exist in the DataFrame.")
        
        # Remove the date part, if present
        self.df[time_column] = self.df[time_column].str.split(' ').str[-1]
        
        # Split the time and create new columns for hour and minute
        time_parts = self.df[time_column].str.split(':', expand=True)
        self.df[f'{prefix}_hour'] = time_parts[0].astype(int)
        self.df[f'{prefix}_minute'] = time_parts[1].astype(int)
        self.df.drop(time_column, axis=1, inplace=True)

    def convert_duration(self, duration_column, output_column):
        """
        Convert duration strings to total minutes and drop the original duration column.
        """
        def duration_to_minutes(duration):
            if pd.isna(duration):
                return None
            parts = duration.split()
            hours = int(parts[0][:-1]) if 'h' in parts[0] else 0
            minutes = int(parts[1][:-1]) if len(parts) > 1 and 'm' in parts[1] else 0
            return hours * 60 + minutes

        self.df[output_column] = self.df[duration_column].apply(duration_to_minutes)
        self.df.drop(duration_column, axis=1, inplace=True)

    def process_all(self, date_column, arrival_column, departure_column, duration_column):
        """
        Perform all processing steps: extract date components, extract times, and convert duration.
        """
        self.extract_day_month_year(date_column)
        self.extract_time(arrival_column, 'arrival')
        self.extract_time(departure_column, 'departure')
        self.convert_duration(duration_column, 'duration_minutes')

    def get_processed_data(self):
        """
        Return the processed DataFrame.
        """
        return self.df


In [25]:
# Sample DataFrame
data = {
    'Date_of_Journey': ['12/03/2023', '15/05/2022'],
    'Arrival_Time': ['08:45', '14:30'],
    'Dep_Time': ['06:00', '12:45'],
    'Duration': ['2h 45m', '2h']
}
df = pd.DataFrame(data)


In [28]:

# Create an instance of the processor class
processor = FlightDataProcessor(df)

# Process all data
processor.process_all(
    date_column='Date_of_Journey',
    arrival_column='Arrival_Time',
    departure_column='Dep_Time',
    duration_column='Duration'
)

# Get the processed DataFrame
processed_df = processor.get_processed_data()
processed_df


Unnamed: 0,day,month,year,arrival_hour,arrival_minute,departure_hour,departure_minute,duration_minutes
0,12,3,2023,8,45,6,0,165
1,15,5,2022,14,30,12,45,120


In [26]:
ft=FlightDataProcessor(df)

In [27]:
ft.convert_duration('Duration')

TypeError: FlightDataProcessor.convert_duration() missing 1 required positional argument: 'output_column'

In [None]:
('Duration')

ValueError: invalid literal for int() with base 10: '2h'

In [10]:
df

Unnamed: 0,Arrival_Time,Dep_Time,Duration,day,month,year
0,08:45,06:00,2h 45m,12,3,2023
1,14:30,12:45,2h,15,5,2022
