In [52]:
from kedro.config import ConfigLoader
from kedro.io import DataCatalog
import os

os.chdir('/Users/alfredo.cinelli/Desktop/Courses/spaceflights-tutorial')

# Initialise a ConfigLoader
conf_loader = ConfigLoader('conf/')

# Load the data catalog configuration from catalog.yml
conf_catalog = conf_loader.get('catalog.yml')

# Create the DataCatalog instance from the configuration
catalog = DataCatalog.from_config(conf_catalog)

In [53]:
conf_catalog

{'companies': {'type': 'pandas.CSVDataSet',
  'filepath': 'data/01_raw/companies.csv'},
 'reviews': {'type': 'pandas.CSVDataSet',
  'filepath': 'data/01_raw/reviews.csv'},
 'shuttles': {'type': 'pandas.ExcelDataSet',
  'filepath': 'data/01_raw/shuttles.xlsx'},
 'preprocessed_companies': {'type': 'pandas.ExcelDataSet',
  'filepath': 'data/02_intermediate/preprocessed_companies.xlsx'},
 'preprocessed_shuttles': {'type': 'pandas.ExcelDataSet',
  'filepath': 'data/02_intermediate/preprocessed_shuttles.xlsx'},
 'model_input_table': {'type': 'pandas.ExcelDataSet',
  'filepath': 'data/03_primary/model_input_table.xlsx'}}

In [66]:
import pandas as pd

def create_model_input_table(
    shuttles: pd.DataFrame, companies: pd.DataFrame, reviews: pd.DataFrame
) -> pd.DataFrame:
    """Combines all data to create a model input table.

    Args:
        shuttles: Preprocessed data for shuttles.
        companies: Preprocessed data for companies.
        reviews: Raw data for reviews.
    Returns:
        model input table.

    """
    model_input_table = shuttles.merge(reviews, left_on="id", right_on="shuttle_id", how='inner')
    model_input_table = model_input_table.merge(companies, left_on="company_id", right_on="id", how='inner')
    #model_input_table = rated_shuttles.merge(
    #    companies, left_on="company_id", right_on="id"
    #)
    model_input_table = model_input_table.dropna()
    return model_input_table

In [54]:
shuttles = catalog.load('preprocessed_shuttles')
shuttles.head(2)

Unnamed: 0,id,shuttle_location,shuttle_type,engine_type,engine_vendor,engines,passenger_capacity,cancellation_policy,crew,d_check_complete,moon_clearance_complete,price,company_id
0,63561,Niue,Type V5,Quantum,ThetaBase Services,1.0,2,strict,1.0,False,False,1325,35029
1,36260,Anguilla,Type V5,Quantum,ThetaBase Services,1.0,2,strict,1.0,True,False,1780,30292


In [55]:
companies = catalog.load('preprocessed_companies')
companies.head(2)

Unnamed: 0,id,company_rating,company_location,total_fleet_count,iata_approved
0,35029,1.0,Niue,4.0,False
1,30292,0.67,Anguilla,6.0,False


In [56]:
reviews = catalog.load('reviews')
reviews.head(2)

Unnamed: 0,shuttle_id,review_scores_rating,review_scores_comfort,review_scores_amenities,review_scores_trip,review_scores_crew,review_scores_location,review_scores_price,number_of_reviews,reviews_per_month
0,63561,97.0,10.0,9.0,10.0,10.0,9.0,10.0,133,1.65
1,36260,90.0,8.0,9.0,10.0,9.0,9.0,9.0,3,0.09


In [67]:
model_input_table = create_model_input_table(shuttles, companies, reviews)
model_input_table.head()

Unnamed: 0,id_x,shuttle_location,shuttle_type,engine_type,engine_vendor,engines,passenger_capacity,cancellation_policy,crew,d_check_complete,...,review_scores_crew,review_scores_location,review_scores_price,number_of_reviews,reviews_per_month,id_y,company_rating,company_location,total_fleet_count,iata_approved
0,63561,Niue,Type V5,Quantum,ThetaBase Services,1.0,2,strict,1.0,False,...,10.0,9.0,10.0,133,1.65,35029,1.0,Niue,4.0,False
1,63561,Niue,Type V5,Quantum,ThetaBase Services,1.0,2,strict,1.0,False,...,10.0,9.0,10.0,133,1.65,35029,1.0,Niue,4.0,False
2,63561,Niue,Type V5,Quantum,ThetaBase Services,1.0,2,strict,1.0,False,...,10.0,9.0,10.0,133,1.65,35029,1.0,Niue,4.0,False
3,63561,Niue,Type V5,Quantum,ThetaBase Services,1.0,2,strict,1.0,False,...,10.0,9.0,10.0,133,1.65,35029,1.0,Niue,4.0,False
4,53260,Niue,Type V5,Quantum,"Banks, Wood and Phillips",1.0,2,strict,1.0,False,...,10.0,9.0,10.0,37,0.48,35029,1.0,Niue,4.0,False


In [71]:
model_input_table.size

21269052