# Other Substances Data From Archival Cannabis Survey

This Jupyter Notebook contains the analysis of the other substances section of archival data provided from Endres et al. from a survey they ran in 2021 on cannabis use in programming.

In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
from scipy.stats import ttest_ind, binom_test, spearmanr
from statsmodels.stats.proportion import proportions_ztest
import matplotlib as mpl

# some_file.py
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '/Users/BLINDED/Survey Analysis/HelperFunctions/')

import HelperStatsFunctions as hs

scipy.__version__

'1.8.1'

# Load Data

The cells below load the data I will be analyzing

In [21]:
#Load the data

raw_coded_data = pd.read_csv('/Users/BLINDED/Survey Analysis/Data/CodedData.csv')
raw_cat_data = pd.read_csv('/Users/BLINDED/Survey Analysis/Data/CategoricalData.csv')
raw_qualitative_data = pd.read_csv('/Users/BLINDED/Survey Analysis/Data/QualitativeData.csv')

num_data = raw_coded_data[raw_coded_data['IsValid'] == True] 
cat_data = raw_cat_data[raw_cat_data['IsValid'] == True]
qual_data = raw_qualitative_data[(raw_qualitative_data['Q7.6'] != "-99")&(raw_qualitative_data['Q7.6'].notnull())]
qual_data = qual_data[['ResponseId','Q7.6']]
print(num_data.shape, cat_data.shape, qual_data.shape)


total_data_points = num_data.shape[0]

(803, 376) (803, 444) (247, 2)


# Quantitative Analysis of Other Substances


This looks at the prevalence of other substance use in general and when programming

In [22]:
# Meta variabls for this section
# Q7.1: Have you used any of these drugs in the last year
# Q7.2: Have you used any mind altering substances other than cannabis while programming in the last year?
# Q7.3: Which ones have you used?
# Q7.4: Are you more or less likely to use it than cannabis while programming?

drugs = {'1': 'Alcohol',
        '2': 'Tobacco',
        '3': 'Caffine',
        '4': 'Amphetamine',
        '5': 'Inhalents',
        '6': 'Cocaine',
        '7': 'Opioids',
        '8': 'Sedatives/Sleeping Pills',
        '9': 'Hallucinogens',
        '10': 'Other'}

### Have used various substances at all in the past year

In [23]:
# General substance use in the last year

latex_table = [['Substance', '4+ per week', '1-3 per week', '1-3 per month', '< 1 per month', 'not in last year', 'no response']]
cannabis_val_counts = num_data['Q5.3'].value_counts()

# Add cannabis to the table - I double-counted that this adds to 803
first_row = ['Cannabis', 
                cannabis_val_counts.get(10) + cannabis_val_counts.get(11) + cannabis_val_counts.get(12), # 5 or more times per week 
                cannabis_val_counts.get(7) + cannabis_val_counts.get(8) + cannabis_val_counts.get(9), # once to 4 times a week (note 9 is the one making this different than the other substances)
                cannabis_val_counts.get(5) + cannabis_val_counts.get(6), # 1-3 times per month 
                cannabis_val_counts.get(2) + cannabis_val_counts.get(3) + cannabis_val_counts.get(4), # Once per year to less than once per month
                cannabis_val_counts.get(0) + cannabis_val_counts.get(1) + num_data['Q5.1'].value_counts()[-1], # "less than once per year / not in last year"
                cannabis_val_counts.get(-99)] # Did not answer but saw it (so did not answer not in last year)

# Make sure we have the right number of entries for the cannabis data
assert(sum(first_row[1:]) == num_data.shape[0])

total_data_points = num_data.shape[0]

# Turn the first row into percentages of the total (all except the saw but did not answer)
for i in range(1, len(first_row) - 1): # Turn into percentages of all users who entered at least one response
    first_row[i] = round(first_row[i] / total_data_points * 100, 2) # Get the percentage of all users here 
latex_table.append(first_row)

# Add in the non cannabis drugs
for k, v in drugs.items():
    if v== "Caffine": continue # skip caffine
    current_val_counts = num_data['Q7.1_' + k].value_counts()
    current_row = [v, current_val_counts.get(4), current_val_counts.get(3), current_val_counts.get(2), current_val_counts.get(1), current_val_counts.get(0), current_val_counts.get(-99)]
    current_row = [x if x is not None else 0 for x in current_row] # Get rid of None values

    for i in range(1, len(current_row) - 1): # Turn into percentages of all users who entered at least one response
        current_row[i] = round(current_row[i] / total_data_points * 100, 2) # Get the percentage of all users here 
    latex_table.append(current_row)

print('Percentage of participants who have used a given substance in the last year - note cannabis gaps are slightly different in year breaks')
hs.print_latex(latex_table, False)


Percentage of participants who have used a given substance in the last year - note cannabis gaps are slightly different in year breaks
               Substance  4+ per week  1-3 per week  1-3 per month  < 1 per month  not in last year  no response 
                Cannabis        16.31         15.19           8.47          12.45             47.45            1 
                 Alcohol        11.71         30.26          25.53          16.31             15.82            3 
                 Tobacco         4.23          1.99           2.99           9.71             80.82            2 
             Amphetamine         3.61          0.25           1.12           4.48             90.16            3 
               Inhalents          0.0           0.0           0.25           1.37             98.13            2 
                 Cocaine         0.12           0.0           0.25           2.74             96.51            3 
                 Opioids         0.12           0.0           0.12 

### Have used any substance while programming in the last year

In [24]:
latex_table = [['Response', 'Percent', 'Raw Number']]

# Those who have used a substance in the last year while programming
substance_val_counts = num_data['Q7.2'].value_counts() #431
substance_val_counts_minus_caffine = 399 # Calculated using qualtrics filtering

# Need the number who have not used 
# Now, we are adding cannabis to these values
has_used_cannabis_in_last_year = num_data[(num_data['Q6.02'] != 1)&(num_data['Q6.02'].notnull())&(num_data['Q6.02'] != -99)]
has_only_used_cannabis_in_last_year = has_used_cannabis_in_last_year[has_used_cannabis_in_last_year['Q7.2'] == -1]

print(has_used_cannabis_in_last_year.shape[0], has_only_used_cannabis_in_last_year.shape[0])

# Now we are building the table
has_used = substance_val_counts_minus_caffine + has_only_used_cannabis_in_last_year.shape[0]
latex_table.append(['Yes', round(has_used / total_data_points * 100, 2), has_used])

has_not_used = substance_val_counts.get(-1) - has_only_used_cannabis_in_last_year.shape[0] + substance_val_counts.get(1) - substance_val_counts_minus_caffine
latex_table.append(['No', round(has_not_used / total_data_points * 100, 2), has_not_used])

latex_table.append(['Unsure', round((substance_val_counts.get(0)) / total_data_points * 100, 2), substance_val_counts.get(0)])
latex_table.append(['No response', round(substance_val_counts.get(-99) / total_data_points * 100, 2), substance_val_counts.get(-99)])

print('Percentage of participants who used a mind-altering substances while completing a software engineering-related task in the last year')
hs.print_latex(latex_table, False)


205 74
Percentage of participants who used a mind-altering substances while completing a software engineering-related task in the last year
   Response  Percent  Raw Number 
        Yes     58.9         473 
         No     40.1         322 
     Unsure      0.5           4 
No response      0.5           4 


### Have used various substances while programming at all during the last year

In [25]:
print("Percentage of responses that have used a substance while programming in the last year")

latex_table = [['Substance', '% all when programming', '% users when programming']]
for k, v in drugs.items():
    if v == "Caffine": continue # do not include caffine
    if k == '10': k = '14'
    current_val_counts = num_data['Q7.3_' + k].value_counts()
    current_row = [v, current_val_counts.get(1), current_val_counts.get(-99)]
    current_row = [x if x is not None else 0 for x in current_row] # Get rid of None values
    current_row[2] = round(current_row[1] / (current_row[1] + current_row[2])*100, 2)
    current_row[1] = round(current_row[1] / 803 * 100, 2)
    latex_table.append(current_row)

hs.print_latex(latex_table, False)

print("Did cannabis using qualtrics filters - found 195 had used cannabis while programing in last year, or 24.4%")

Percentage of responses that have used a substance while programming in the last year
               Substance  % all when programming  % users when programming 
                 Alcohol                   24.53                     50.26 
                 Tobacco                    5.73                     50.55 
             Amphetamine                    4.73                     69.09 
               Inhalents                     0.0                       0.0 
                 Cocaine                    0.12                      5.26 
                 Opioids                    0.37                      30.0 
Sedatives/Sleeping Pills                    0.87                      35.0 
           Hallucinogens                    2.12                     22.08 
                   Other                    1.74                      5.74 
Did cannabis using qualtrics filters - found 195 had used cannabis while programing in last year, or 24.4%
