In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from typing import List
%matplotlib inline

In [2]:
# step 1 - read in the files
dimensions_matrix = pd.read_excel('./test_data/Dimensions_Matrix.xlsx')
cutoff_matrix = pd.read_excel('./test_data/cutoff_data.xlsx')
weigths_matrix = pd.read_excel('./test_data/Weights.xlsx')

In [21]:
def get_deprivation_matrix(dimensions_matrix: pd.DataFrame, cutoff_matrix: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate the deprivation matrix (g0) based on dimensions and cutoff values.

    Args:
        dimensions_matrix (pd.DataFrame): DataFrame containing dimensions data.
        cutoff_matrix (pd.DataFrame): DataFrame containing cutoff values.

    Returns:
        pd.DataFrame: Deprivation matrix (g0).
    """
    # Get column names for the dataframes
    dimensions_column_names: List[str] = dimensions_matrix.columns

    # Create a copy of the dimensions matrix as the deprivation matrix
    deprivation_matrix: pd.DataFrame = dimensions_matrix.copy()

    # Generate values for the deprivation matrix based on dimensions and cutoff values
    for index, col_name in enumerate(dimensions_column_names):
        cutoff_value = cutoff_matrix.at[index, 'Cutoff']
        deprivation_matrix[col_name] = deprivation_matrix[col_name].apply(lambda value: 1 if value == cutoff_value else 0)

    return deprivation_matrix

In [22]:
def calculate_weighted_deprivation_matrix(deprivation_matrix: pd.DataFrame, weights_matrix: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate the weighted deprivation matrix based on the deprivation matrix and weights.

    Args:
        deprivation_matrix (pd.DataFrame): Deprivation matrix (g0).
        weights_matrix (pd.DataFrame): DataFrame containing weights.

    Returns:
        pd.DataFrame: Weighted deprivation matrix.
    """
    # Iterate through columns of the deprivation matrix
    for index, col_name in enumerate(deprivation_matrix.columns):
        # Get the weight corresponding to the column
        weight = weights_matrix.at[index, 'Weight']
        
        # Multiply the values in the column by the weight
        deprivation_matrix[col_name] = deprivation_matrix[col_name] * weight
    
    return deprivation_matrix

In [23]:

def calculate_deprevation_scores(weighted_deprivation_matrix: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate deprivation scores based on the weighted deprivation matrix.

    Args:
        weighted_deprivation_matrix (pd.DataFrame): DataFrame containing weighted deprivation values.

    Returns:
        pd.DataFrame: DataFrame containing deprivation scores.
    """
    # Determine the number of rows in the weighted deprivation matrix
    number_of_rows = len(weighted_deprivation_matrix)

    # Initialize a DataFrame to store the deprivation scores
    data = {'Scores': [0] * number_of_rows}
    deprevation_scores = pd.DataFrame(data)

    # Calculate the deprivation score for each row in the weighted deprivation matrix
    for index in range(number_of_rows):
        deprevation_score = weighted_deprivation_matrix.iloc[index].sum()
        
        # Update the 'Scores' column in the deprivation scores DataFrame
        deprevation_scores.at[index, 'Scores'] = deprevation_score

    return deprevation_scores

In [24]:
df = get_deprivation_matrix(dimensions_matrix, cutoff_matrix)
df = calculate_weighted_deprivation_matrix(df, weigths_matrix)
scores = calculate_deprevation_scores(df)


Unnamed: 0,Scores
0,1
1,3
2,0
3,2
4,2


In [32]:

def get_censored_vector(deprevation_scores_vector: pd.DataFrame, cutoff_score: int) -> pd.DataFrame:
    """
    Generate a censored vector based on deprivation scores and a cutoff score.

    Args:
        deprevation_scores_vector (pd.DataFrame): DataFrame containing deprivation scores.
        cutoff_score (int): Cutoff score used to censor deprivation scores.

    Returns:
        pd.DataFrame: Censored vector of deprivation scores.
    """
    # Create a copy of the deprivation scores vector
    censored_vector = deprevation_scores_vector.copy()

    # Iterate through the rows of the vector and apply censoring
    for index in range(len(censored_vector)):
        deprivation_score = censored_vector.at[index, 'Scores']

        # Apply censoring: If the deprivation score is below the cutoff, set it to 0
        censored_vector.at[index, 'Scores'] = deprivation_score if deprivation_score >= cutoff_score else 0

    return censored_vector

In [33]:
df = get_deprivation_matrix(dimensions_matrix, cutoff_matrix)
df = calculate_weighted_deprivation_matrix(df, weigths_matrix)
scores = calculate_deprevation_scores(df)
censored = get_Censored_vector(scores,3)
censored.head(2)

Unnamed: 0,Scores
0,0
1,3
