In [1]:
# data reading, writing, exploration, cleaning
import pandas as pd
# array creation and operation
import numpy as np 
# plotting library
import matplotlib.pyplot as plt 
# data visualization library
import seaborn as sns
# remove warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# loading the dataset
df = pd.read_csv("C:/Users/Dell/OneDrive/Desktop/Data Engineer Project/Project-1/feedback_dataset.csv")
df

Unnamed: 0,feedback_id,order_id,feedback_score,feedback_form_sent_date,feedback_answer_date
0,7bc2406110b926393aa56f80a40eba40,73fc7af87114b39712e6da79b0a377eb,4,2018-01-18 00:00:00,2018-01-18 21:46:59
1,80e641a11e56f04c1ad469d5645fdfde,a548910a1c6147796b98fdf73dbeba33,5,2018-03-10 00:00:00,2018-03-11 03:05:13
2,228ce5500dc1d8e020d8d1322874b6f0,f9e4b658b201a9f2ecdecbb34bed034b,5,2018-02-17 00:00:00,2018-02-18 14:36:24
3,e64fb393e7b32834bb789ff8bb30750e,658677c97b385a9be170737859d3511b,5,2017-04-21 00:00:00,2017-04-21 22:02:06
4,f7c4243c7fe1938f181bec41a392bdeb,8e6bfb81e283fa7e4f11123a3fb894f1,5,2018-03-01 00:00:00,2018-03-02 10:26:53
...,...,...,...,...,...
99995,f3897127253a9592a73be9bdfdf4ed7a,22ec9f0669f784db00fa86d035cf8602,5,2017-12-09 00:00:00,2017-12-11 20:06:42
99996,b3de70c89b1510c4cd3d0649fd302472,55d4004744368f5571d1f590031933e4,5,2018-03-22 00:00:00,2018-03-23 09:10:43
99997,1adeb9d84d72fe4e337617733eb85149,7725825d039fc1f0ceb7635e3f7d9206,4,2018-07-01 00:00:00,2018-07-02 12:59:13
99998,be360f18f5df1e0541061c87021e6d93,f8bd3f2000c28c5342fedeb5e50f2e75,1,2017-12-15 00:00:00,2017-12-16 01:29:43


In [3]:
df.isnull().sum()

feedback_id                0
order_id                   0
feedback_score             0
feedback_form_sent_date    0
feedback_answer_date       0
dtype: int64

In [4]:
def transform_datetime_column(df, column_name, prefix):
    # Convert the column to datetime
    df[column_name] = pd.to_datetime(df[column_name])
    
    # Split Date and Time
    df[f'{prefix}_Date'] = df[column_name].dt.date
    df[f'{prefix}_Time'] = df[column_name].dt.time
    
    # Extract Year, Month, Day, and Season
    df[f'{prefix}_Year'] = df[column_name].dt.year
    df[f'{prefix}_Month'] = df[column_name].dt.month
    df[f'{prefix}_Day'] = df[column_name].dt.day
    
    # Function to determine the season
    def get_season(month):
        if month in [12, 1, 2]:
            return 'Winter'
        elif month in [3, 4, 5]:
            return 'Spring'
        elif month in [6, 7, 8]:
            return 'Summer'
        else:
            return 'Fall'
    
    df[f'{prefix}_Season'] = df[f'{prefix}_Month'].apply(get_season)
    
    # Extract Hour, Minute, Second, AM/PM, and Time of Day
    df[f'{prefix}_Hour'] = df[column_name].dt.hour
    df[f'{prefix}_Minute'] = df[column_name].dt.minute
    df[f'{prefix}_Second'] = df[column_name].dt.second
    df[f'{prefix}_AM/PM'] = df[column_name].dt.strftime('%p')
    
    # Function to determine the time of day
    def get_period(hour):
        if 5 <= hour < 12:
            return 'Morning'
        elif 12 <= hour < 17:
            return 'Afternoon'
        elif 17 <= hour < 21:
            return 'Evening'
        else:
            return 'Night'
    
    df[f'{prefix}_TimeOfDay'] = df[f'{prefix}_Hour'].apply(get_period)
    
    return df


In [5]:
df = transform_datetime_column(df, 'feedback_answer_date', 'Feedback_Answer')

In [6]:
df

Unnamed: 0,feedback_id,order_id,feedback_score,feedback_form_sent_date,feedback_answer_date,Feedback_Answer_Date,Feedback_Answer_Time,Feedback_Answer_Year,Feedback_Answer_Month,Feedback_Answer_Day,Feedback_Answer_Season,Feedback_Answer_Hour,Feedback_Answer_Minute,Feedback_Answer_Second,Feedback_Answer_AM/PM,Feedback_Answer_TimeOfDay
0,7bc2406110b926393aa56f80a40eba40,73fc7af87114b39712e6da79b0a377eb,4,2018-01-18 00:00:00,2018-01-18 21:46:59,2018-01-18,21:46:59,2018,1,18,Winter,21,46,59,PM,Night
1,80e641a11e56f04c1ad469d5645fdfde,a548910a1c6147796b98fdf73dbeba33,5,2018-03-10 00:00:00,2018-03-11 03:05:13,2018-03-11,03:05:13,2018,3,11,Spring,3,5,13,AM,Night
2,228ce5500dc1d8e020d8d1322874b6f0,f9e4b658b201a9f2ecdecbb34bed034b,5,2018-02-17 00:00:00,2018-02-18 14:36:24,2018-02-18,14:36:24,2018,2,18,Winter,14,36,24,PM,Afternoon
3,e64fb393e7b32834bb789ff8bb30750e,658677c97b385a9be170737859d3511b,5,2017-04-21 00:00:00,2017-04-21 22:02:06,2017-04-21,22:02:06,2017,4,21,Spring,22,2,6,PM,Night
4,f7c4243c7fe1938f181bec41a392bdeb,8e6bfb81e283fa7e4f11123a3fb894f1,5,2018-03-01 00:00:00,2018-03-02 10:26:53,2018-03-02,10:26:53,2018,3,2,Spring,10,26,53,AM,Morning
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,f3897127253a9592a73be9bdfdf4ed7a,22ec9f0669f784db00fa86d035cf8602,5,2017-12-09 00:00:00,2017-12-11 20:06:42,2017-12-11,20:06:42,2017,12,11,Winter,20,6,42,PM,Evening
99996,b3de70c89b1510c4cd3d0649fd302472,55d4004744368f5571d1f590031933e4,5,2018-03-22 00:00:00,2018-03-23 09:10:43,2018-03-23,09:10:43,2018,3,23,Spring,9,10,43,AM,Morning
99997,1adeb9d84d72fe4e337617733eb85149,7725825d039fc1f0ceb7635e3f7d9206,4,2018-07-01 00:00:00,2018-07-02 12:59:13,2018-07-02,12:59:13,2018,7,2,Summer,12,59,13,PM,Afternoon
99998,be360f18f5df1e0541061c87021e6d93,f8bd3f2000c28c5342fedeb5e50f2e75,1,2017-12-15 00:00:00,2017-12-16 01:29:43,2017-12-16,01:29:43,2017,12,16,Winter,1,29,43,AM,Night


In [7]:
# Function to extract Year, Month, Day, and Season
def extract_date_components(df, column_name, prefix):
    # Convert the column to datetime
    df[column_name] = pd.to_datetime(df[column_name])
    
    # Extract Year, Month, and Day
    df[f'{prefix}_Year'] = df[column_name].dt.year
    df[f'{prefix}_Month'] = df[column_name].dt.month
    df[f'{prefix}_Day'] = df[column_name].dt.day
    
    # Function to determine the season
    def get_season(month):
        if month in [12, 1, 2]:
            return 'Winter'
        elif month in [3, 4, 5]:
            return 'Spring'
        elif month in [6, 7, 8]:
            return 'Summer'
        else:
            return 'Fall'
    
    df[f'{prefix}_Season'] = df[f'{prefix}_Month'].apply(get_season)
    
    return df

In [8]:
df = extract_date_components(df, 'feedback_form_sent_date', 'feedback_form_sent')

In [9]:
df

Unnamed: 0,feedback_id,order_id,feedback_score,feedback_form_sent_date,feedback_answer_date,Feedback_Answer_Date,Feedback_Answer_Time,Feedback_Answer_Year,Feedback_Answer_Month,Feedback_Answer_Day,Feedback_Answer_Season,Feedback_Answer_Hour,Feedback_Answer_Minute,Feedback_Answer_Second,Feedback_Answer_AM/PM,Feedback_Answer_TimeOfDay,feedback_form_sent_Year,feedback_form_sent_Month,feedback_form_sent_Day,feedback_form_sent_Season
0,7bc2406110b926393aa56f80a40eba40,73fc7af87114b39712e6da79b0a377eb,4,2018-01-18,2018-01-18 21:46:59,2018-01-18,21:46:59,2018,1,18,Winter,21,46,59,PM,Night,2018,1,18,Winter
1,80e641a11e56f04c1ad469d5645fdfde,a548910a1c6147796b98fdf73dbeba33,5,2018-03-10,2018-03-11 03:05:13,2018-03-11,03:05:13,2018,3,11,Spring,3,5,13,AM,Night,2018,3,10,Spring
2,228ce5500dc1d8e020d8d1322874b6f0,f9e4b658b201a9f2ecdecbb34bed034b,5,2018-02-17,2018-02-18 14:36:24,2018-02-18,14:36:24,2018,2,18,Winter,14,36,24,PM,Afternoon,2018,2,17,Winter
3,e64fb393e7b32834bb789ff8bb30750e,658677c97b385a9be170737859d3511b,5,2017-04-21,2017-04-21 22:02:06,2017-04-21,22:02:06,2017,4,21,Spring,22,2,6,PM,Night,2017,4,21,Spring
4,f7c4243c7fe1938f181bec41a392bdeb,8e6bfb81e283fa7e4f11123a3fb894f1,5,2018-03-01,2018-03-02 10:26:53,2018-03-02,10:26:53,2018,3,2,Spring,10,26,53,AM,Morning,2018,3,1,Spring
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,f3897127253a9592a73be9bdfdf4ed7a,22ec9f0669f784db00fa86d035cf8602,5,2017-12-09,2017-12-11 20:06:42,2017-12-11,20:06:42,2017,12,11,Winter,20,6,42,PM,Evening,2017,12,9,Winter
99996,b3de70c89b1510c4cd3d0649fd302472,55d4004744368f5571d1f590031933e4,5,2018-03-22,2018-03-23 09:10:43,2018-03-23,09:10:43,2018,3,23,Spring,9,10,43,AM,Morning,2018,3,22,Spring
99997,1adeb9d84d72fe4e337617733eb85149,7725825d039fc1f0ceb7635e3f7d9206,4,2018-07-01,2018-07-02 12:59:13,2018-07-02,12:59:13,2018,7,2,Summer,12,59,13,PM,Afternoon,2018,7,1,Summer
99998,be360f18f5df1e0541061c87021e6d93,f8bd3f2000c28c5342fedeb5e50f2e75,1,2017-12-15,2017-12-16 01:29:43,2017-12-16,01:29:43,2017,12,16,Winter,1,29,43,AM,Night,2017,12,15,Winter


In [10]:
# Remove the column
df = df.drop('feedback_answer_date', axis=1)

In [11]:
df

Unnamed: 0,feedback_id,order_id,feedback_score,feedback_form_sent_date,Feedback_Answer_Date,Feedback_Answer_Time,Feedback_Answer_Year,Feedback_Answer_Month,Feedback_Answer_Day,Feedback_Answer_Season,Feedback_Answer_Hour,Feedback_Answer_Minute,Feedback_Answer_Second,Feedback_Answer_AM/PM,Feedback_Answer_TimeOfDay,feedback_form_sent_Year,feedback_form_sent_Month,feedback_form_sent_Day,feedback_form_sent_Season
0,7bc2406110b926393aa56f80a40eba40,73fc7af87114b39712e6da79b0a377eb,4,2018-01-18,2018-01-18,21:46:59,2018,1,18,Winter,21,46,59,PM,Night,2018,1,18,Winter
1,80e641a11e56f04c1ad469d5645fdfde,a548910a1c6147796b98fdf73dbeba33,5,2018-03-10,2018-03-11,03:05:13,2018,3,11,Spring,3,5,13,AM,Night,2018,3,10,Spring
2,228ce5500dc1d8e020d8d1322874b6f0,f9e4b658b201a9f2ecdecbb34bed034b,5,2018-02-17,2018-02-18,14:36:24,2018,2,18,Winter,14,36,24,PM,Afternoon,2018,2,17,Winter
3,e64fb393e7b32834bb789ff8bb30750e,658677c97b385a9be170737859d3511b,5,2017-04-21,2017-04-21,22:02:06,2017,4,21,Spring,22,2,6,PM,Night,2017,4,21,Spring
4,f7c4243c7fe1938f181bec41a392bdeb,8e6bfb81e283fa7e4f11123a3fb894f1,5,2018-03-01,2018-03-02,10:26:53,2018,3,2,Spring,10,26,53,AM,Morning,2018,3,1,Spring
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,f3897127253a9592a73be9bdfdf4ed7a,22ec9f0669f784db00fa86d035cf8602,5,2017-12-09,2017-12-11,20:06:42,2017,12,11,Winter,20,6,42,PM,Evening,2017,12,9,Winter
99996,b3de70c89b1510c4cd3d0649fd302472,55d4004744368f5571d1f590031933e4,5,2018-03-22,2018-03-23,09:10:43,2018,3,23,Spring,9,10,43,AM,Morning,2018,3,22,Spring
99997,1adeb9d84d72fe4e337617733eb85149,7725825d039fc1f0ceb7635e3f7d9206,4,2018-07-01,2018-07-02,12:59:13,2018,7,2,Summer,12,59,13,PM,Afternoon,2018,7,1,Summer
99998,be360f18f5df1e0541061c87021e6d93,f8bd3f2000c28c5342fedeb5e50f2e75,1,2017-12-15,2017-12-16,01:29:43,2017,12,16,Winter,1,29,43,AM,Night,2017,12,15,Winter


In [12]:
def categorize_score(score):
    if score == 5:
        return 'Excellent'
    elif score >= 4:
        return 'Good'
    elif score >= 3:
        return 'Average'
    elif score >= 2:
        return 'Below Average'
    else:
        return 'Poor'

df['feedback_score_category'] = df['feedback_score'].apply(categorize_score)


In [13]:
df

Unnamed: 0,feedback_id,order_id,feedback_score,feedback_form_sent_date,Feedback_Answer_Date,Feedback_Answer_Time,Feedback_Answer_Year,Feedback_Answer_Month,Feedback_Answer_Day,Feedback_Answer_Season,Feedback_Answer_Hour,Feedback_Answer_Minute,Feedback_Answer_Second,Feedback_Answer_AM/PM,Feedback_Answer_TimeOfDay,feedback_form_sent_Year,feedback_form_sent_Month,feedback_form_sent_Day,feedback_form_sent_Season,feedback_score_category
0,7bc2406110b926393aa56f80a40eba40,73fc7af87114b39712e6da79b0a377eb,4,2018-01-18,2018-01-18,21:46:59,2018,1,18,Winter,21,46,59,PM,Night,2018,1,18,Winter,Good
1,80e641a11e56f04c1ad469d5645fdfde,a548910a1c6147796b98fdf73dbeba33,5,2018-03-10,2018-03-11,03:05:13,2018,3,11,Spring,3,5,13,AM,Night,2018,3,10,Spring,Excellent
2,228ce5500dc1d8e020d8d1322874b6f0,f9e4b658b201a9f2ecdecbb34bed034b,5,2018-02-17,2018-02-18,14:36:24,2018,2,18,Winter,14,36,24,PM,Afternoon,2018,2,17,Winter,Excellent
3,e64fb393e7b32834bb789ff8bb30750e,658677c97b385a9be170737859d3511b,5,2017-04-21,2017-04-21,22:02:06,2017,4,21,Spring,22,2,6,PM,Night,2017,4,21,Spring,Excellent
4,f7c4243c7fe1938f181bec41a392bdeb,8e6bfb81e283fa7e4f11123a3fb894f1,5,2018-03-01,2018-03-02,10:26:53,2018,3,2,Spring,10,26,53,AM,Morning,2018,3,1,Spring,Excellent
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,f3897127253a9592a73be9bdfdf4ed7a,22ec9f0669f784db00fa86d035cf8602,5,2017-12-09,2017-12-11,20:06:42,2017,12,11,Winter,20,6,42,PM,Evening,2017,12,9,Winter,Excellent
99996,b3de70c89b1510c4cd3d0649fd302472,55d4004744368f5571d1f590031933e4,5,2018-03-22,2018-03-23,09:10:43,2018,3,23,Spring,9,10,43,AM,Morning,2018,3,22,Spring,Excellent
99997,1adeb9d84d72fe4e337617733eb85149,7725825d039fc1f0ceb7635e3f7d9206,4,2018-07-01,2018-07-02,12:59:13,2018,7,2,Summer,12,59,13,PM,Afternoon,2018,7,1,Summer,Good
99998,be360f18f5df1e0541061c87021e6d93,f8bd3f2000c28c5342fedeb5e50f2e75,1,2017-12-15,2017-12-16,01:29:43,2017,12,16,Winter,1,29,43,AM,Night,2017,12,15,Winter,Poor


In [288]:
df.to_csv('C:/Users/Dell/OneDrive/Desktop/Data Engineer Project/Project-1/Original dataset/feedback.csv', index=False)

In [14]:
df.columns

Index(['feedback_id', 'order_id', 'feedback_score', 'feedback_form_sent_date',
       'Feedback_Answer_Date', 'Feedback_Answer_Time', 'Feedback_Answer_Year',
       'Feedback_Answer_Month', 'Feedback_Answer_Day',
       'Feedback_Answer_Season', 'Feedback_Answer_Hour',
       'Feedback_Answer_Minute', 'Feedback_Answer_Second',
       'Feedback_Answer_AM/PM', 'Feedback_Answer_TimeOfDay',
       'feedback_form_sent_Year', 'feedback_form_sent_Month',
       'feedback_form_sent_Day', 'feedback_form_sent_Season',
       'feedback_score_category'],
      dtype='object')