# Response Item Network Analysis

### Import

In [None]:
# Import 
import pandas as pd
import warnings
from pandas.errors import PerformanceWarning

# Suppress PerformanceWarnings
warnings.simplefilter(action='ignore', category=PerformanceWarning)

# Import custom libraries
import util
import responseItemNetwork
from util import UtilityFunctions as uf
from responseItemNetwork import ResponseItemNetwork as ResIN

# reload due to changes 
from importlib import reload
reload(util)   
reload(responseItemNetwork)  

# Monkey patch the method from the utility class to the pandas DataFrame
pd.DataFrame.filter_features = uf.filter_features
pd.DataFrame.filter_numerical_values = uf.filter_numerical_values
pd.DataFrame.filter_negative_values = uf.filter_negative_values
pd.DataFrame.filter_columns_with_less_unique_values_than_threshold = uf.filter_columns_with_less_unique_values_than_threshold
pd.DataFrame.drop_columns = uf.drop_columns

### Create test graph and visualise

In [None]:
import random

# Generate a pandas DataFrame with the constraints
num_rows = 150
data = []

for _ in range(num_rows):
    # Generate random answers for a and b
    a = random.choice([1, 2, 3])
    b = random.choice([1, 2, 3])
    
    # If either a or b is max (3), ensure c and d are not max
    if a == 3 or b == 3:
        c = random.choice([1, 2])
        d = random.choice([1, 2])
    else:
        # Otherwise, c and d can have any value, including max
        c = random.choice([1, 2, 3])
        d = random.choice([1, 2, 3])
    
    data.append({'a': a, 'b': b, 'c': c, 'd': d})

df = pd.DataFrame(data)
print(df)

# create mapping
mapping = {
    'a': ['a', 3, False],
    'b': ['c', 3, False],
    'c': ['a', 3, False],
    'd': ['c', 3, False]
}

resin = ResIN(df, mapping, None)
resin.visualize_graph()
print(resin)


## Load Data 

In [None]:
path = './../data/processed/'
files_to_load = "2018_G"
df = pd.DataFrame()
processed_files = uf.get_csv_files_from_folder(path)
for f in processed_files:
        if f.__contains__(files_to_load):
            print(f)
            df_new_set = pd.read_csv(f)
            df = pd.concat([df, df_new_set], ignore_index=True)

## Select Features

In [None]:
question_mapping = {
    'D060': ('University Gender', 4, False),
    'D078': ('Women Executives', 4, False),
    'D059': ('Women Leaders', 4, False),

    'E069_11': ('Gov Trust', 4, False),
    'E069_12': ('Party Trust', 4, False),
    'E069_07': ('Parliament Trust', 4, False),

    'F120': ('Abortion', 10, False),
    'F118': ('Homosexuality', 10, False),
    'F119': ('Prostitution', 10, False),
    'F144_02': ('Death Penalty', 10, False),

    'G052': ('Immigration Impact', 5, False),
    'B008': ('Environment vs Growth', 3, False),
    'F034': ('Religious Person', 2, False),
    'D026_03': ('Child Duty', 5, False),
    'E018': ('Authority Respect', 3, False),

    'E035': ('Income Equality', 10, False),
    'E036': ('State Ownership', 10, False),
    'E037': ('Gov Responsibility', 10, False),
    'E039': ('Competition', 10, False)
}

NUMBER_OF_QUESTIONS =  len(question_mapping)
questions = list(question_mapping.keys())[:NUMBER_OF_QUESTIONS]
df_filtered = df.filter_features(questions + ['E033'])

# Create a new dictionary with only the first 5 keys
question_mapping_filtered = {key: question_mapping[key] for key in questions}
print(df_filtered.columns)

## Process Features Into Binary variables 

In [None]:
political_belief_feature = 'E033'
resin = ResIN(df_filtered, question_mapping_filtered, political_belief_feature)
resin.visualize_graph(False) # False to not show the node labels in graph 
print(resin.df.shape)
print(resin.df.columns)