# Mental Health in the Tech Industry Data Presentation and Visualization

Let's create a class for data presentation and visualization based on the Data_Processing module which gather and process the data form mental_health database.

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import Data_Processing as dp

In [2]:
test = dp.DataProcessing('DB/', 'mental_health')

In [3]:
test.get_users_no_for_q_and_answer(2, 'Male')

3044

In [25]:
def show_answers_pct_for_questions(q_ids):
    '''  Show percentage of answers on given questions. '''
    for qid in q_ids:
        df = get_answers_pct(qid)
        print(df.name)
        print(df)
        print()

In [26]:
def get_answers_pct(q_id):
    ''' Get percentage of answers on given question.
        Returns data frame answers*[answers [%], answers no]'''
    q_text = test.get_question_text(q_id)
    df = pd.DataFrame(columns=['answers [%]', 'answers no'])
    df.name = f'Question {q_id}. {q_text} [%] and sum.'
    for anspct, anssum in zip(test.get_answers_distribution_for_q(q_id), test.get_users_no_per_answer(q_id)):
        df.loc[anspct[0]] = [anspct[1], anssum[1]]
    return df

In [27]:
q_list = [6, 7, 9, 28, 30, 32, 33, 54, 78]
show_answers_pct_for_questions(q_list)

Question 6. Do you have a family history of mental illness? [%] and sum.
              answers [%]  answers no
I don't know        15.39       649.0
No                  40.33      1701.0
Yes                 44.29      1868.0

Question 7. Have you ever sought treatment for a mental health disorder from a mental health professional? [%] and sum.
   answers [%]  answers no
0        42.82      1806.0
1        57.18      2412.0

Question 9. Is your employer primarily a tech company/organization? [%] and sum.
     answers [%]  answers no
0          19.58       826.0
1          68.47      2888.0
n/a        11.95       504.0

Question 28. Would you have been willing to discuss your mental health with your direct supervisor(s)? [%] and sum.
                                      answers [%]  answers no
I don't know                                 6.42       190.0
No, at none of my previous employers        14.06       416.0
No, none of my previous supervisors         16.40       485.0
Some of my

In [15]:
def show_answers_pct_yearly_for_questions(q_ids):
    '''  Show percentage of answers on given questions yearly. '''
    for qid in q_ids:
        df = get_answers_pct_yearly(qid)
        print(df.name)
        print(df)
        print()

In [16]:
def get_answers_pct_yearly(q_id):
    ''' Get percentage of answers on given question yearly.
        Returns data frame answers*years'''
    q_text = test.get_question_text(q_id)
    df = pd.DataFrame()
    df.name = f'Question {q_id}. {q_text} [%]'
    for answer, year, pct in test.get_answers_distribution_for_q_by_years(q_id):
        df.loc[answer, year] = pct
#     print(df.name)
#     print(df)
#     print()
    return df

In [17]:
q_list = [2, 6, 7, 9, 28, 30, 32, 33, 54, 78]
show_answers_pct_yearly_for_questions(q_list)

Question 2. What is your gender? [%]
             2014   2016   2017   2018   2019
Female      19.60  23.45  28.84  29.98  27.84
Male        78.65  73.76  66.40  63.79  64.77
Non-binary   0.08   0.42   0.79   0.48   1.14
Other        1.67   2.16   2.25   5.04   4.83
n/a           NaN   0.21   1.72   0.72   1.42

Question 6. Do you have a family history of mental illness? [%]
               2014   2016   2017   2018   2019
No            60.95  34.05  29.63  27.34  30.40
Yes           39.05  46.76  44.84  49.88  45.17
I don't know    NaN  19.19  25.53  22.78  24.43

Question 7. Have you ever sought treatment for a mental health disorder from a mental health professional? [%]
    2014   2016   2017   2018   2019
0  49.44  41.45  39.68  36.93  38.35
1  50.56  58.55  60.32  63.07  61.65

Question 9. Is your employer primarily a tech company/organization? [%]
      2014   2016   2017   2018   2019
0    18.17  18.35  18.78  22.78  27.56
1    81.83  61.62  66.27  63.79  58.81
n/a    NaN  20.03

In [110]:
def show_positive_answers_ratio_by_categories_for_questions(q_ids):
    ''' Show the ratio of positive answers on one questions by categories from another questions.
        Each element of q_ids list should includes two question:
        first one determine the categories
        second the actual question, e.g. [8, 7].'''
    for cat_q, pos_q in q_ids:
        df = get_positive_answers_ratio_by_categories(cat_q, pos_q)
        cat_text = test.get_question_text(cat_q)
        pos_text = test.get_question_text(pos_q)
        print(f'Categories determined by question {cat_q}:')
        print(cat_text)
        print(f'Ratio of the following question {pos_q}:')
        print(pos_text)
        print(df)
        print()

In [111]:
def get_positive_answers_ratio_by_categories(q_cat, q_pos):
    ''' Get the ratio of positive answers on one question (q_pos) by categories from another question (q_cat).
        Positive means 1 or "yes". q_pos needs to be divalent otherwise the results 
        Returns the data frame categories*[answers [%], answers no]'''
    answers = [answer[0]+' [%]' for answer in test.get_all_answers_per_q(q_pos)]
    tabs = []
    answers_no = []
    df = pd.DataFrame(columns=answers+['answers no'])
    for answer in answers:
        tabs.append(test.get_some_answer_no_based_on_answer_and_q(q_cat, q_pos, answer[:-4]))
        for row in test.get_some_answer_no_based_on_answer_and_q(q_cat, q_pos, answer[:-4]):
            try:
                df.loc[row[0], answer] += row[1]
            except KeyError:
                df.loc[row[0]] = 0
                df.loc[row[0], answer] += row[1]
    df['answers no'] = df[answers].sum(axis=1)
    df[answers] = df[answers].divide(df['answers no'], axis=0)*100
    df[answers] = df[answers].apply(lambda x: round(x.astype('float'), 2), axis=1)
    return df

In [112]:
show_positive_answers_ratio_by_categories_for_questions([[8, 32]])

Categories determined by question 8:
How many employees does your company or organization have?
Ratio of the following question 32:
Have you had a mental health disorder in the past?
                Yes [%]  Possibly [%]  No [%]  n/a [%]  Don't Know [%]  \
1-5               40.22         19.57   36.96     0.00            3.26   
100-500           49.02         17.48   27.94     0.49            5.07   
26-100            45.98         17.20   34.02     0.19            2.62   
500-1000          43.01         22.04   30.11     0.54            4.30   
6-25              45.36         17.04   33.83     0.25            3.51   
More than 1000    51.27         14.76   29.21     0.95            3.81   
n/a               49.60         20.24   26.59     0.60            2.98   

                answers no  
1-5                   92.0  
100-500              612.0  
26-100               535.0  
500-1000             186.0  
6-25                 399.0  
More than 1000       630.0  
n/a                  