In [4]:
#importing libraries
import pandas as pd

#Function to load and filter the dataset
def load_filter_data(file_path, exclude_type):
    """
    Loads data from a CSV file and filters the accommodation types.

    Parameters:
    file_path (str): CSV file path.

    Returns:
    DataFrame: Filtered DataFrame.
    """
    
    #Reading dataset with index set to false
    df = pd.read_csv(file_path, index_col=False)

    #To filter the dataset excluding the observation specified
    df_filtered= df[df['Main Accommodation Type'] != exclude_type].copy()

    #To reindex the dataFrame
    df_filtered.reset_index(drop=True, inplace=True)
    
    #To filter the dataset ecluding the observation specified
    return df_filtered

In [2]:
#function to calculate the interquartile range and identify outliers
def calculate_iqr(df_filtered, feature):
    """
    Calculates interquartiles, IQR, and outliers.
    
    PArameters:
    df_filtered - pandas DataFrame.
    feature - feature where the IQR and outliers are going to be calculated.
    
    Returns:
    outliers - DataFrame showing outliers.
    """

    constant = 1.5
    #To calculate the 25th percentile (first quartile)
    Q1 = df_filtered[feature].quantile(0.25)
    #To calculate the 75th percentile (third quartile)
    Q3 = df_filtered[feature].quantile(0.75)
    IQR = Q3 - Q1

    #To identify outliers
    
    #Calculates the lower bound
    lower = Q1 - constant * IQR
    #Calculates the upper bound
    upper = Q3 + constant * IQR
    
    outliers = df_filtered[(df_filtered[feature] < lower) | (df_filtered[feature] > upper)]
    
    # Returns features specified only
    return outliers[['Average Length of Stay of Foreign Visitors (Nights per trip)']]


## Graphics

In [6]:
#Function to plot a heatmap
def plot_correlation_heatmap(dataframe, variables, title='Correlation Matrix Heatmap'):
    """
    Plots a heatmap of the correlation matrix for the specified variables in the given DataFrame.

    Parameters:
    - dataframe: pd.DataFrame - The DataFrame containing the data.
    - variables: list - A list of column names to include in the correlation matrix.
    - title: str - The title for the heatmap.
    """
    correlation_matrix = dataframe[variables].corr()

    plt.figure(figsize=(10, 6))
    sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', square=True)
    plt.title(title)
    plt.show()