# IMPORT DEPENDENCIES
1. Pandas
2. Math

In [3]:
import pandas as pd
import math

# CODE FROM PACKAGE

In [5]:
class Categories:
    """
    A class used to represent a category of a feature. A category represents a 
    choice given in a question.
    Attributes
    ----------
    name : str
        the name of the category
    index : int
        the index is used to identify the categories. It preserves the order of the 
        categories in case of ordinal features.
    prob : float
        the occurence probability of a category of a feature in a particular population
    entropy : float
        the entropy of a category of a feature in a particular population given by
        entropy = -p * log(p) where p is the occurence probability
        
    """
  
    def __init__(self, name, index):
        """
        Parameters
        ----------
        name : str
            the name of the category
        index : int
            the index is used to identify the categories. It preserves the order of the 
            categories in case of ordinal features.
        """

        self.name = name
        self.index = index
        self.prob = 0
        self.entropy = 0

In [6]:
def distance_between_categories(features):
    """
    Calculates the distance between all the categories for every feature.
    
    Parameters
    ----------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class
     
    Returns
    -------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class with updated distance values
    """
    
    for feat in features:
        for i in features[feat].category_order.keys():
            features[feat].distances[features[feat].category_order[i]] = dict()

    for feat in features:
        for i in features[feat].category_order.keys():
            features[feat].distances[features[feat].category_order[i]][features[feat].category_order[i]] = 0               
            
            if (features[feat].feat_type == 'ordinal'):
                for j in range(i+1, features[feat].num_cat):
                    prev = features[feat].distances[features[feat].category_order[i]][features[feat].category_order[j-1]]
                    curr = prev + (features[feat].weight)*(features[feat].categories[features[feat].category_order[j]].entropy)
                    
                    features[feat].distances[features[feat].category_order[i]][features[feat].category_order[j]] = curr
                    features[feat].distances[features[feat].category_order[j]][features[feat].category_order[i]] = curr
            else:
                for j in range(i+1, features[feat].num_cat):
                    curr = (features[feat].weight)*(features[feat].categories[features[feat].category_order[j]].entropy + features[feat].categories[features[feat].category_order[i]].entropy) 
                    
                    features[feat].distances[features[feat].category_order[i]][features[feat].category_order[j]] = curr 
                    features[feat].distances[features[feat].category_order[j]][features[feat].category_order[i]] = curr
                    
    return features
    

def distance_between_individuals(data, features):
    """
    Calculates the distance between individuals of a population
    
    Parameters
    ----------
    df : panda dataframe
        the dataset of the population in the format:
                 | Feedback  |  Color  |   Hobby  
        ------------------------------------------
        Person A |   Poor    |   Red   |  Football
                
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class.
     
    Returns
    -------
    dist_mat : list
        a list of lists representing the pairwise distance matrix of the individuals
    """
    
    dist_mat = []
    
    for index1 in data.index:
        
        dist_temp = []
        for index2 in data.index: 
            
            dist = 0
            for col in data.columns:     
                
                if (type(data.loc[index1,col]) == str and type(data.loc[index2,col]) == str):
                    dist += (features[col].distances[data.loc[index1,col]][data.loc[index2,col]])**2
            
            dist_temp.append(math.sqrt(dist))
        
        dist_mat.append(dist_temp)
            
    dist_mat = pd.DataFrame(dist_mat)    
    
    return dist_mat

In [7]:
def probability(df, features):
    """
    Calculates the occurence probability of all the categories for every feature.
    
    Parameters
    ----------
    df : panda dataframe
        the dataset of the population
        
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class.
        
    Returns
    -------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class with updated probability values.
    """
    
    for feat in features.keys():
        series = df[feat].value_counts(normalize=True,sort=False)
        for cat in series.index:
            features[feat].categories[cat].prob = series[cat]

    return features

def entropy(features):
    """
    Calculates the entropy of all the categories for every feature.
    
    Parameters
    ----------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class.
     
    Returns
    -------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class with updated entropy values.
    """
    
    for feat in features.values():
        for cat in feat.categories.values():
            if(cat.prob):
                cat.entropy = (-1)*cat.prob*math.log(cat.prob)
            else:
                pass

    return features


def std_entropy(features):
    """
    Calculates the standard entropy of all the features.
    
    Parameters
    ----------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class.
     
    Returns
    -------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class with updated standard entropy values.
    """
    
    for feat in features.values():
        if(feat.num_cat):
            feat.std_entropy = math.log(feat.num_cat)
        else:
            pass

    return features

def feature_entropy(features):
    """
    Calculates the entropy of all the features.
    
    Parameters
    ----------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class.
     
    Returns
    -------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class with updated feature entropy values.
    """
    
    for feat in features.values():
        feat_entropy = 0
        for cat in feat.categories.values():
            feat_entropy = feat_entropy + cat.entropy

        feat.entropy = feat_entropy

    return features

def reliability(features):
    """
    Calculates the reliability of all the features.
    
    Parameters
    ----------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class.
     
    Returns
    -------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class with updated reliability values.
    """
    
    for feat in features.values():
        try:
            feat.reliability = feat.entropy/ feat.std_entropy
        except ZeroDivisionError as error:
            Logging.log_exception(error)

    return features

def total_reliability(features):
    """
    Calculates the total reliability of the features.
    
    Parameters
    ----------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class.
     
    Returns
    -------
    total_rel : float
        the total reliability of the features
    """
    
    total_rel = 0
    for feat in features.values():
        total_rel = total_rel + feat.reliability

    return total_rel

def weights(features, total_reliability):
    """
    Calculates the weights of all the features.
    
    Parameters
    ----------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class.
    total_reliability : float
        the total reliability of the features
        
    Returns
    -------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class with updated weight values.
    """
    
    for feat in features:
        features[feat].weight = features[feat].reliability/ total_reliability

    return features

In [8]:
class Features:
    """
    A class used to represent a feature. A feature represents question asked in the
    questionnaire.
    ...
    Attributes
    ----------
    name : str
        the name of the feature
    feat_type : str
        the type of feature: ordinal or nominal
    num_cat : int
        the number of categories for a feature in the questionnaire
    categories : dictionary
        a dictionary with keys as category name and values as objects of Categories class
    entropy : float
        the entropy of a feature in a particular population, given by
        entropy of feature = sum(entropy of categories)
    std_entropy : float
        the standard entropy of a feature given by log(num_cat)
    reliability : float
        the reliability of a feature given by entropy/ std_entropy
    weight : float
        the weight of a feature given by reliability/ total_reliability
    distance : dictionary
        the dictionary of distances between categories of a feature.
        distance[cat1][cat2] represents distance between cat1 and cat2
    category_order : dictionary
        the dictionary from category index to category name
        
    """
    
    def __init__(self, name, feat_type):
        """
        Parameters
        ----------
        name : str
            the name of the feature
        feat_type : str
            the type of feature: ordinal or nominal
        """
        
        self.name = name
        self.feat_type = feat_type
        self.num_cat = 0
        self.categories = dict()
        self.entropy = 0
        self.std_entropy = 0
        self.reliability = 0
        self.weight = 0
        self.distance = dict()
        self.category_order = dict()

In [9]:
def read_nom(data):
    """
    Reads the nominal data from the data file and instantiate the Features and
    Categories classes.
    
    Parameters
    ----------
    data : str
        The file location of the spreadsheet containing nominal categories in the format:
        Feature  | Category1 | Category2 | Category3.........
        ----------------------------------------------------
        Color    |    Red    |    Blue   |  Green
                 '           '           '
                 '           '           '
                 
    Returns
    -------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class.
    """
    
    try:
        nominal_categories = pd.read_csv(data,header=None,index_col=0)
    except ImportError:
        print('Error Importing Nominal Data')

    features = dict()
    
    for feat in nominal_categories.index:
        feat_instance = Features(feat,'nominal')
        
        cat_names = nominal_categories.loc[feat,nominal_categories.loc[feat,:].notnull()].tolist()
        feat_instance.num_cat = len(cat_names)
        
        for _index, _name in enumerate(cat_names):
            feat_instance.categories[_name] = Categories(_name, _index)
            feat_instance.category_order[_index] = _name
           
        features[feat] = feat_instance
        
    return features

In [10]:
def read_ord(data):
    """
    Reads the ordinal data from the data file and instantiate the Features and
    Categories classes.
    Parameters
    ----------
    data : str
        The file location of the spreadsheet containing ordinal categories in the format:
        Feature  | Category1 | Category2 | Category3.........
        ----------------------------------------------------
        Feedback |   Poor    |    Good   |  Excellent
                 '           '           '
                 '           '           '
                 
    Returns
    -------
    features : dictionary
        a dictionary of features with keys as feature name and values as objects of
        feature class.
    """
    
    try:
        ordinal_categories = pd.read_csv(data,header=None,index_col=0)
    except ImportError:
        print('Error Importing Ordinal Data')

    features = dict()
    
    for feat in ordinal_categories.index:
        feat_instance = Features(feat,'ordinal')
        
        cat_names = ordinal_categories.loc[feat,ordinal_categories.loc[feat,:].notnull()].tolist()
        feat_instance.num_cat = len(cat_names)
        
        for _index, _name in enumerate(cat_names):
            feat_instance.categories[_name] = Categories(_name, _index)
            feat_instance.category_order[_index] = _name
           
        features[feat] = feat_instance
        
    return features

# PREPROCESSING

Random code. Don't refer! 

https://www.kaggle.com/xiaowenlimarketing/international-student-time-management

In [15]:
df = pd.read_csv('International students Time management data.csv', index_col=0)

In [16]:
df

Unnamed: 0_level_0,Age,Gender,Nationality,Program,Course,English,Academic,Attendance,"You often feel that your life is aimless, with no definite purpose",You never have trouble organizing the things you have to do?,"Once you've started an activity, you persist at it until you've completed it",Sometimes you feel that the things you have to do during the day just don't seem to matter,You will plan your activities from day to day.,You tend to leave things to the last minute?,You tend to change rather aimlessly from one activity to another during the day.,You give up the things that you planning to do just because your friend says no.,You think you do enough with your time.,You are easy to get bored with your day-today activities.,The important interests/activities in your life tend to change frequently.,You know how much time you spend on each of the homework I do.
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,31-35,M,Korea,PM,Social Sciences and Humanities,60%~70%,,S0,Disagree,Agree,Strong Agree,Neither,Agree,Neither,Disagree,Strong Disagree,Strong Agree,Neither,Disagree,Agree
2,26-30,M,China,PM,Science and engineering,60%~70%,50%~59%,S3,Strong Agree,Agree,Neither,Disagree,Agree,Neither,Disagree,Strong Disagree,Neither,Agree,Neither,Disagree
3,26-30,M,Kenya,PM,Business,60%~70%,,S0,Disagree,Strong Agree,Agree,Disagree,Agree,Agree,Disagree,Strong Disagree,Disagree,Strong Agree,Strong Agree,Disagree
4,21-25,M,Vietnam,PM,Law/Legal studies,60%~70%,60%~70%,S0,Disagree,Disagree,Agree,Agree,Disagree,Agree,Neither,Neither,Neither,Neither,Disagree,Agree
5,21-25,M,China,PM,Business,60%~70%,50%~59%,S1,Neither,Disagree,Neither,Neither,Disagree,Neither,Neither,Neither,Neither,Disagree,Neither,Agree
6,21-25,M,China,PM,Law/Legal studies,60%~70%,60%~70%,S0,Agree,Neither,Agree,Strong Agree,Disagree,Disagree,Strong Agree,Agree,Strong Agree,Strong Agree,Strong Disagree,Strong Agree
7,21-25,M,China,PM,Art and Design,60%~70%,60%~70%,S0,Disagree,Agree,Agree,Neither,Agree,Neither,Neither,Strong Disagree,Disagree,Strong Disagree,Strong Disagree,Neither
8,21-25,M,China,PM,Business,60%~70%,,S0,Agree,Neither,Neither,Agree,Agree,Neither,Agree,Neither,Agree,Agree,Agree,Agree
9,21-25,M,China,PM,Computing,60%~70%,40%~49%,S0,Disagree,Disagree,Strong Disagree,Agree,Disagree,Agree,Agree,Disagree,Agree,Disagree,Disagree,Disagree
10,21-25,M,China,PM,Computing,60%~70%,>70%,S0,Strong Disagree,Agree,Agree,Neither,Disagree,Strong Disagree,Strong Disagree,Strong Disagree,Agree,Agree,Strong Disagree,Strong Disagree


In [29]:
d = pd.DataFrame(df.columns)

TypeError: __init__() got an unexpected keyword argument 'header'

In [23]:
d.index = d[0]

In [26]:
d.drop(0,axis=1,inplace = True)


In [36]:
d.to_csv('preprocessed test data.csv')

In [43]:
df['Nationality'].unique()

array(['Korea', 'China', 'Kenya', 'Vietnam', 'India', 'Saudi Arabia',
       'Tanzania', 'Oman', 'Jordan', 'Zimbabwe', 'Portugal', 'Georgia',
       'Singapore', 'Malaysia', 'Qatar', 'Turkey', 'Kuwait', 'OCEAN',
       'UK', 'Pakistan', 'Thailand', 'Indonesia', 'Trinidad and Tobago',
       'Colombia', 'Gongo', 'Ukraine'], dtype=object)

In [44]:
row = []
for col in df:
    row.append(df[col].unique().tolist())

In [49]:
d = pd.DataFrame(row, index=df.columns)

In [51]:
d.to_csv('preprocessed test data.csv')


In [57]:
d = pd.read_csv('preprocessed test data ordinal.csv',header=None, index_col=0)

In [58]:
d

Unnamed: 0_level_0,1,2,3,4,5
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"You often feel that your life is aimless, with no definite purpose",Strong Disagree,Disagree,Neither,Agree,Strong Agree
You never have trouble organizing the things you have to do?,Strong Disagree,Disagree,Neither,Agree,Strong Agree
"Once you've started an activity, you persist at it until you've completed it",Strong Disagree,Disagree,Neither,Agree,Strong Agree
Sometimes you feel that the things you have to do during the day just don't seem to matter,Strong Disagree,Disagree,Neither,Agree,Strong Agree
You will plan your activities from day to day.,Strong Disagree,Disagree,Neither,Agree,Strong Agree
You tend to leave things to the last minute?,Strong Disagree,Disagree,Neither,Agree,Strong Agree
You tend to change rather aimlessly from one activity to another during the day.,Strong Disagree,Disagree,Neither,Agree,Strong Agree
You give up the things that you planning to do just because your friend says no.,Strong Disagree,Disagree,Neither,Agree,Strong Agree
You think you do enough with your time.,Strong Disagree,Disagree,Neither,Agree,Strong Agree
You are easy to get bored with your day-today activities.,Strong Disagree,Disagree,Neither,Agree,Strong Agree


# TEST DATA

In [63]:
feat = read_ord('preprocessed test data ordinal.csv')
feat_nom = read_nom('preprocessed test data nominal.csv')
feat.update(feat_nom)

## TEST 1

In [64]:
for f in feat.keys():
    print('name:',f)
    print('type:',feat[f].feat_type)
    print('num_cat:',feat[f].num_cat)
    print()
    for cat in feat[f].categories.keys():
        print('    index:',feat[f].categories[cat].index)
        print('    name:',feat[f].categories[cat].name)
        print()
    print()

name: You often feel that your life is aimless, with no definite purpose
type: ordinal
num_cat: 5

    index: 0
    name: Strong Disagree

    index: 1
    name: Disagree

    index: 2
    name: Neither

    index: 3
    name: Agree

    index: 4
    name: Strong Agree


name: You never have trouble organizing the things you have to do?
type: ordinal
num_cat: 5

    index: 0
    name: Strong Disagree

    index: 1
    name: Disagree

    index: 2
    name: Neither

    index: 3
    name: Agree

    index: 4
    name: Strong Agree


name: Once you've started an activity, you persist at it until you've completed it
type: ordinal
num_cat: 5

    index: 0
    name: Strong Disagree

    index: 1
    name: Disagree

    index: 2
    name: Neither

    index: 3
    name: Agree

    index: 4
    name: Strong Agree


name: Sometimes you feel that the things you have to do during the day just don't seem to matter
type: ordinal
num_cat: 5

    index: 0
    name: Strong Disagree

    index: 1
    

# Verdict: Working

# READ POPULATION

In [66]:
 df = pd.read_csv('International students Time management data.csv', index_col=0)

In [67]:
df

Unnamed: 0_level_0,Age,Gender,Nationality,Program,Course,English,Academic,Attendance,"You often feel that your life is aimless, with no definite purpose",You never have trouble organizing the things you have to do?,"Once you've started an activity, you persist at it until you've completed it",Sometimes you feel that the things you have to do during the day just don't seem to matter,You will plan your activities from day to day.,You tend to leave things to the last minute?,You tend to change rather aimlessly from one activity to another during the day.,You give up the things that you planning to do just because your friend says no.,You think you do enough with your time.,You are easy to get bored with your day-today activities.,The important interests/activities in your life tend to change frequently.,You know how much time you spend on each of the homework I do.
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,31-35,M,Korea,PM,Social Sciences and Humanities,60%~70%,,S0,Disagree,Agree,Strong Agree,Neither,Agree,Neither,Disagree,Strong Disagree,Strong Agree,Neither,Disagree,Agree
2,26-30,M,China,PM,Science and engineering,60%~70%,50%~59%,S3,Strong Agree,Agree,Neither,Disagree,Agree,Neither,Disagree,Strong Disagree,Neither,Agree,Neither,Disagree
3,26-30,M,Kenya,PM,Business,60%~70%,,S0,Disagree,Strong Agree,Agree,Disagree,Agree,Agree,Disagree,Strong Disagree,Disagree,Strong Agree,Strong Agree,Disagree
4,21-25,M,Vietnam,PM,Law/Legal studies,60%~70%,60%~70%,S0,Disagree,Disagree,Agree,Agree,Disagree,Agree,Neither,Neither,Neither,Neither,Disagree,Agree
5,21-25,M,China,PM,Business,60%~70%,50%~59%,S1,Neither,Disagree,Neither,Neither,Disagree,Neither,Neither,Neither,Neither,Disagree,Neither,Agree
6,21-25,M,China,PM,Law/Legal studies,60%~70%,60%~70%,S0,Agree,Neither,Agree,Strong Agree,Disagree,Disagree,Strong Agree,Agree,Strong Agree,Strong Agree,Strong Disagree,Strong Agree
7,21-25,M,China,PM,Art and Design,60%~70%,60%~70%,S0,Disagree,Agree,Agree,Neither,Agree,Neither,Neither,Strong Disagree,Disagree,Strong Disagree,Strong Disagree,Neither
8,21-25,M,China,PM,Business,60%~70%,,S0,Agree,Neither,Neither,Agree,Agree,Neither,Agree,Neither,Agree,Agree,Agree,Agree
9,21-25,M,China,PM,Computing,60%~70%,40%~49%,S0,Disagree,Disagree,Strong Disagree,Agree,Disagree,Agree,Agree,Disagree,Agree,Disagree,Disagree,Disagree
10,21-25,M,China,PM,Computing,60%~70%,>70%,S0,Strong Disagree,Agree,Agree,Neither,Disagree,Strong Disagree,Strong Disagree,Strong Disagree,Agree,Agree,Strong Disagree,Strong Disagree


## TEST 2

In [70]:
feat = probability(df,feat)

In [71]:
for f in feat.keys():
    print(f)
    print(feat[f].feat_type,feat[f].num_cat)
    for cat in feat[f].categories.keys():
        print(feat[f].categories[cat].index,feat[f].categories[cat].name,feat[f].categories[cat].prob)
    print()

You often feel that your life is aimless, with no definite purpose
ordinal 5
0 Strong Disagree 0.13008130081300814
1 Disagree 0.3902439024390244
2 Neither 0.21951219512195122
3 Agree 0.18699186991869918
4 Strong Agree 0.07317073170731707

You never have trouble organizing the things you have to do?
ordinal 5
0 Strong Disagree 0.056
1 Disagree 0.32
2 Neither 0.288
3 Agree 0.28
4 Strong Agree 0.056

Once you've started an activity, you persist at it until you've completed it
ordinal 5
0 Strong Disagree 0.008264462809917356
1 Disagree 0.12396694214876033
2 Neither 0.2892561983471074
3 Agree 0.47107438016528924
4 Strong Agree 0.10743801652892562

Sometimes you feel that the things you have to do during the day just don't seem to matter
ordinal 5
0 Strong Disagree 0.0423728813559322
1 Disagree 0.17796610169491525
2 Neither 0.4067796610169492
3 Agree 0.3474576271186441
4 Strong Agree 0.025423728813559324

You will plan your activities from day to day.
ordinal 5
0 Strong Disagree 0.0645161290

## TEST 3

In [72]:
feat = entropy(feat)

In [73]:
for f in feat.keys():
    print(f)
    print(feat[f].feat_type,feat[f].num_cat)
    for cat in feat[f].categories.keys():
        print(feat[f].categories[cat].index,feat[f].categories[cat].name,feat[f].categories[cat].prob,feat[f].categories[cat].entropy)
    print()

You often feel that your life is aimless, with no definite purpose
ordinal 5
0 Strong Disagree 0.13008130081300814 0.26531325309042425
1 Disagree 0.3902439024390244 0.3672130124739616
2 Neither 0.21951219512195122 0.33285676595884867
3 Agree 0.18699186991869918 0.3135274244487411
4 Strong Agree 0.07317073170731707 0.19133852034411206

You never have trouble organizing the things you have to do?
ordinal 5
0 Strong Disagree 0.056 0.1614146009418313
1 Disagree 0.32 0.36461897062027676
2 Neither 0.288 0.358500902067703
3 Agree 0.28 0.3564303892276085
4 Strong Agree 0.056 0.1614146009418313

Once you've started an activity, you persist at it until you've completed it
ordinal 5
0 Strong Disagree 0.008264462809917356 0.03963463260823753
1 Disagree 0.12396694214876033 0.2588107865075865
2 Neither 0.2892561983471074 0.3588056772211277
3 Agree 0.47107438016528924 0.3545961886978916
4 Strong Agree 0.10743801652892562 0.23967715244427815

Sometimes you feel that the things you have to do during th

## TEST 4

In [84]:
feat = std_entropy(feat)
feat = feature_entropy(feat)
feat = reliability(feat)
total_rel = total_reliability(feat)
feat = weights(feat, total_rel)

In [85]:
for f in feat.keys():
    print('name:',f)
    print('type:',feat[f].feat_type)
    print('num_cat:',feat[f].num_cat)
    print('entropy:',feat[f].entropy)
    print('std_entropy:',feat[f].std_entropy)
    print('reliability:',feat[f].reliability)
    print()
    for cat in feat[f].categories.keys():
        print('    index:',feat[f].categories[cat].index)
        print('    name:',feat[f].categories[cat].name)
        print('    prob:',feat[f].categories[cat].prob)
        print('    entropy:',feat[f].categories[cat].entropy)
        print()
    print()

name: You often feel that your life is aimless, with no definite purpose
type: ordinal
num_cat: 5
entropy: 1.4702489763160878
std_entropy: 1.6094379124341003
reliability: 0.9135170514856927

    index: 0
    name: Strong Disagree
    prob: 0.13008130081300814
    entropy: 0.26531325309042425

    index: 1
    name: Disagree
    prob: 0.3902439024390244
    entropy: 0.3672130124739616

    index: 2
    name: Neither
    prob: 0.21951219512195122
    entropy: 0.33285676595884867

    index: 3
    name: Agree
    prob: 0.18699186991869918
    entropy: 0.3135274244487411

    index: 4
    name: Strong Agree
    prob: 0.07317073170731707
    entropy: 0.19133852034411206


name: You never have trouble organizing the things you have to do?
type: ordinal
num_cat: 5
entropy: 1.402379463799251
std_entropy: 1.6094379124341003
reliability: 0.8713473523674512

    index: 0
    name: Strong Disagree
    prob: 0.056
    entropy: 0.1614146009418313

    index: 1
    name: Disagree
    prob: 0.32
    e

# TEST FUNCTIONS

In [128]:
def test_prob(feat):
    for f in feat.keys():
        p = 0
        for cat in feat[f].categories.keys():
            p += feat[f].categories[cat].prob
        #assert(p == 1)#, 'Probability is not 1 for ' + f
        #print(p)
        assert(abs(1-p) <= 10**(-9) )
    return 'Test Probability Passed'

def test_entropy(feat):
    for f in feat.keys():
        for cat in feat[f].categories.keys():
            p = feat[f].categories[cat].prob
        assert(feat[f].categories[cat].entropy == -1*p*math.log(p)), 'Entropy is not -plog(p)'
    return 'Test Entropy Passed'

In [129]:
test_prob(feat)

'Test Probability Passed'

In [130]:
test_entropy(feat)

'Test Entropy Passed'