In [6]:
import pandas as pd
import numpy as np
from scipy import stats
import os

In [None]:
base_dir=os.getcwd()
if os.name == 'nt':
    path_break='\\'
else:
    path_break='/'

In [7]:
# mental_effort_df for mean and stdev for all mental effort
def dicts_for_zscore_calc(df):
    list_of_mental_demands = [df['Q24_1'], df['Q25_1'], df['Q26_1'], df['Q27_1'], df['Q28_1']]
    list_of_temporal_demands = [df['Q24_2'], df['Q25_2'], df['Q26_2'], df['Q27_2'], df['Q28_2']]
    list_of_efforts = [df['Q24_3'], df['Q25_3'], df['Q26_3'], df['Q27_3'], df['Q28_3']]
    list_of_frustrations = [df['Q24_4'], df['Q25_4'], df['Q26_4'], df['Q27_4'], df['Q28_4']]

    mean_and_stdev_dict = {}
    list_of_mental_efforts = ['Mental Demand', 'Temporal Demand', 'Effort', 'Frustration']
    list_of_mental_effort_lists = [list_of_mental_demands, list_of_temporal_demands, list_of_efforts, list_of_frustrations]

    for index in range(0, 4):
        temp_series = pd.concat(list_of_mental_effort_lists[index])
        mean_and_stdev_dict[list_of_mental_efforts[index]] = {
            'Mean': temp_series.mean(), 
            'StDev': temp_series.std()
        }

    return mean_and_stdev_dict

def special_zscore_calc(series, mean_and_stdev_dict):
    col_effort_type = series.name[-2:]
    conversion_dict = {'_1': 'Mental Demand', '_2': 'Temporal Demand', '_3': 'Effort', '_4': 'Frustration'}
    temp_dict = mean_and_stdev_dict[conversion_dict[col_effort_type]]
    return (series - temp_dict['Mean']) / temp_dict['StDev']

In [8]:
# Note - Fix normalizing. Normalize across each mental effort, not in isolation. (E.g., put all Mental Demand together to get Mean and StDev, then manually calculate)
# What I need for calc
    # Z_i(P_test) - standardized test performance
    # Z_i(E_test) - standardized mental effort

# create dict of Q to Z_Q, then calculate

understanding_expectations = ['Q24_1', 'Q24_2', 'Q24_3', 'Q24_4']
initial_post = ['Q25_1', 'Q25_2', 'Q25_3', 'Q25_4']
reading_posts = ['Q26_1', 'Q26_2', 'Q26_3', 'Q26_4']
replying_to_posts = ['Q27_1', 'Q27_2', 'Q27_3', 'Q27_4']
integrate_feedback = ['Q28_1', 'Q28_2', 'Q28_3', 'Q28_4']

all_IE_cols = understanding_expectations + initial_post + reading_posts + replying_to_posts + integrate_feedback

def calc_IE(df, name):
    df[f'Z_Discussions Final Score'] = stats.zscore(df['Discussions Final Score'], nan_policy='omit')
    mean_and_stdev_dict = dicts_for_zscore_calc(df)
    for col_name in all_IE_cols:
        df[f'Z_{col_name}'] = special_zscore_calc(df[col_name], mean_and_stdev_dict)
        df[f'E_{col_name}'] = (df[f'Z_Discussions Final Score'] - df[f'Z_{col_name}']) / np.sqrt(2)

    all_dicts = []

    for index in range(24, 29):
        temp_dict = {
            'Mental Demand': df[f'E_Q{index}_1'].sum() / len(df[f'E_Q{index}_1']), 
            'Temporal Demand': df[f'E_Q{index}_2'].sum() / len(df[f'E_Q{index}_2']), 
            'Effort': df[f'E_Q{index}_3'].sum() / len(df[f'E_Q{index}_3']),
            'Frustration': df[f'E_Q{index}_4'].sum() / len(df[f'E_Q{index}_4']), 
        }
        all_dicts.append(temp_dict)

    df_E = pd.DataFrame(all_dicts, index=(['Understanding what is expected', 'Crafting your initial discussion post', 'Critically reading posts from your instructor and peers', 'Creating reply posts', 'Integrating instructor feedback into future discussion posts']))
    df_E.to_excel(f'{base_dir}{path_break}Tables{path_break}instructional_efficiency_chart_{name}.xlsx')

In [16]:
# CoI instrument
# load factors
def load_factors():
    factor_coeffs_df = pd.read_excel('Loading coefficients.xlsx')
    factor_coeffs_df = factor_coeffs_df[['Column Names', 'Weights']]
    factor_coeffs_df.dropna(subset=['Column Names'], inplace=True)
    factors_coeff_dict = dict(zip(factor_coeffs_df['Column Names'], factor_coeffs_df['Weights']))
    return factors_coeff_dict

def calculate_presence_scores(df, factor_coeffs_dict):
    for key in factor_coeffs_dict.keys():
        df[key] = df[key] * factor_coeffs_dict[key]

    teaching_presence_cols = ['Q17_1', 'Q17_2', 'Q17_3', 'Q17_4', 'Q18_1', 'Q18_2', 'Q18_3', 'Q18_4', 'Q18_5', 'Q19_1', 'Q19_2', 'Q19_3']
    df['Teaching Presence'] = df[teaching_presence_cols].sum(axis=1)

    social_presence_cols = ['Q19_4', 'Q19_5', 'Q20_1', 'Q20_2', 'Q20_3', 'Q20_4', 'Q20_5', 'Q21_1', 'Q21_2']
    df['Social Presence'] = df[social_presence_cols].sum(axis=1)

    cognitive_presence_cols = ['Q21_3', 'Q21_4', 'Q21_5', 'Q22_1', 'Q22_2', 'Q22_3', 'Q22_4', 'Q22_5', 'Q23_1', 'Q23_2', 'Q23_3', 'Q23_4']
    df['Cognitive Presence'] = df[cognitive_presence_cols].sum(axis=1)
    return df

def correl_matrix(df, name):
    correl_cols = all_IE_cols + ['Teaching Presence', 'Social Presence', 'Cognitive Presence'] + ['Discussions Final Score', 'Final Score']
    reduced_df = df[correl_cols]
    correl_matrix = reduced_df.corr()
    correl_matrix.to_excel(f'{base_dir}{path_break}Tables{path_break}correlation_matrix_{name}.xlsx')

In [18]:
final_df = pd.read_excel('clean_data_final.xlsx')

factors_coeff_dict = load_factors()
final_df = calculate_presence_scores(final_df, factors_coeff_dict)

pre_df = final_df[final_df['PRE or POST'] == 'PRE']
post_df = final_df[final_df['PRE or POST'] == 'POST']

calc_IE(pre_df, 'PRE')
calc_IE(post_df, 'POST')
calc_IE(final_df, 'ALL')

correl_matrix(pre_df, 'PRE')
correl_matrix(post_df, 'POST')
correl_matrix(final_df, 'ALL')

final_df.to_excel('in_analysis.xlsx')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'Z_Discussions Final Score'] = stats.zscore(df['Discussions Final Score'], nan_policy='omit')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'Z_{col_name}'] = special_zscore_calc(df[col_name], mean_and_stdev_dict)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'E_{col_name}'] = (df[f'Z_