In [None]:
import os
import pandas as pd
import numpy as np
import datetime
import time

In [None]:
renfe = pd.read_csv("renfe_clean_1.csv")

In [None]:
renfe.head(3)

### Just keeping the number of the calculated column days to departure

In [None]:
renfe["days_to_departure"] = renfe["days_to_departure"].apply(lambda x: x[:2])

### Reducing train type categories into 3 big categories

In [None]:
high_speed = ["AVE", "AVE-TGV"]
long_distance = ["INTERCITY", "ALVIA", "TRENHOTEL", "AV City"] 
regional = ["MD", "LD", "EXPRESS", "REGIONAL"]

In [None]:
def train_class(x):
    if x in high_speed:
        return "High_speed"
    elif x in long_distance:
        return "Long_distance"
    else:
        return "Regional"

In [None]:
renfe["train_category"] = renfe["train_type"].apply(train_class)

### Reducing ticket classes into 2 big categories

In [None]:
renfe.loc[renfe["ticket_class"].str.match("Turista"), "ticket_class_category"] = "Economy"
renfe.loc[renfe["ticket_class"].str.match("Preferente"), "ticket_class_category"] = "First_class"
renfe.loc[renfe["ticket_class"].str.match("Clase"), "ticket_class_category"] = "First_class"

### Reducing fare types into 2 big categories

In [None]:
fixed = ["Promo", "Promo +", "Adulto", "Mesa", "Grupos Ida"]

In [None]:
def fare(x):
    if x in fixed:
        return "Fixed"
    else:
        return "Flexible"

In [None]:
renfe["fare_category"] = renfe["fare"].apply(fare)

### Creating categories for time variables

In [None]:
timing = renfe["price_check_time"].apply(lambda x: time.strptime(x, "%H:%M:%S"))
timing_2 = renfe["departure_time"].apply(lambda x: time.strptime(x, "%H:%M:%S"))

In [None]:
renfe["price_check_hour"] = timing.apply(lambda x: x[3])
renfe["departure_hour"] = timing_2.apply(lambda x: x[3])

In [None]:
def time_window(t):
    if t > 2 and t < 6:
        return "Deep night"
    elif t >= 6 and t < 10:
        return "Morning"
    elif t >= 10 and t < 14:
        return "Late_morning"
    elif t >= 14 and t < 18:
        return "Afternoon"
    elif t >= 18 and t < 22:
        return "Evening"
    else:
        return "Night"

In [None]:
renfe["price_check_time_window"] = renfe["price_check_hour"].apply(time_window)
renfe["departure_time_window"] = renfe["departure_hour"].apply(time_window)

In [None]:
renfe.price_check_time_window.value_counts()

### Last categorization adjustments

In [None]:
weekday = ["Monday", "Tueday", "Wednesday", "Thursday", "Friday"]

In [None]:
def day(x):
    if x in weekday:
        return "weekday"
    else:
        return "weekend"

In [None]:
renfe["departure_day"] = renfe["departure_weekday"].apply(day)

In [None]:
renfe.drop("ticket_class", axis=1, inplace=True)
renfe.drop("fare", axis=1, inplace=True)

In [None]:
renfe.head(3)

In [None]:
renfe.to_csv("../Data/renfe_clean_2.csv", index=False)