# Importing database

In [1]:
import pandas as pd
from rich.jupyter import display

In [2]:
database = pd.read_csv('data/simulated_database.csv')
print(f'{database.shape[0]} lines x {database.shape[1]} columns of simulated data')

459 lines x 3 columns of simulated data


In [3]:
database.head()

Unnamed: 0,use_of_ai,ai_competence,digital_resource
0,4.4,4.35,4.08
1,4.19,3.35,3.62
2,4.63,3.14,3.75
3,4.43,4.36,3.65
4,4.11,4.89,3.0


# Results analysis

### Paper's table 2: Results of the descriptive analysis

In [4]:
from scipy.stats import pearsonr
from IPython.display import display

variable_labels = {
    "use_of_ai": "Use of AI",
    "ai_competence": "AI Competence",
    "digital_resource": "Digital Resources"
}

variables = list(variable_labels.keys())

stats = {
    "Variable": [variable_labels[var] for var in variables],
    "Mean": [database[var].mean() for var in variables],
    "Standard Deviation": [database[var].std() for var in variables]
}

stats_table = pd.DataFrame(stats)

display(stats_table)

Unnamed: 0,Variable,Mean,Standard Deviation
0,Use of AI,4.567298,0.41009
1,AI Competence,4.491111,0.635412
2,Digital Resources,3.970196,0.41703


### Paper's table 3: Results of the descriptive correlation analysis

In [5]:
correlation_data = {variable_labels[var]: [] for var in variables}

for var1 in variables:
    for var2 in variables:
        if var1 == var2:
            correlation_data[variable_labels[var2]].append("1")
        else:
            r, p = pearsonr(database[var1], database[var2])
            formatted = f"{r:.3f}" + ("**" if p < 0.01 else "")
            correlation_data[variable_labels[var2]].append(formatted)

correlation_table = pd.DataFrame(correlation_data, index=[variable_labels[v] for v in variables])

display(correlation_table)

Unnamed: 0,Use of AI,AI Competence,Digital Resources
Use of AI,1,0.362**,0.338**
AI Competence,0.362**,1,0.232**
Digital Resources,0.338**,0.232**,1


In [6]:
import statsmodels.api as sm

def train_model(y_column, x_columns, data):
    X = sm.add_constant(data[x_columns])
    y = data[y_column]
    model = sm.OLS(y, X).fit()
    return model

use_of_ai_model = train_model("use_of_ai", ["ai_competence", "digital_resource"], database)
digital_resource_model = train_model("digital_resource", ["ai_competence", "use_of_ai"], database)

### Paper's table 4: Multiple regression adjustment model

In [7]:
def generate_model_line(model_name, model):
    r_squared = model.rsquared
    adj_r_squared = model.rsquared_adj
    r = r_squared**0.5
    std_error = model.mse_resid**0.5
    f_stat = model.fvalue
    f_pvalue = model.f_pvalue

    return {
        "Model": model_name,
        "R": round(r, 3),
        "R Square": round(r_squared, 3),
        "Adjusted R Square": round(adj_r_squared, 3),
        "Standard Error": round(std_error, 3),
        "F Statistic": round(f_stat, 3),
        "Sig. F Change": f_pvalue if f_pvalue >= 0.001 else "< 0.001"
    }

lines = [
    generate_model_line("Use of AI", use_of_ai_model),
    generate_model_line("Digital Resources", digital_resource_model)
]

regression_table = pd.DataFrame(lines)
display(regression_table)

Unnamed: 0,Model,R,R Square,Adjusted R Square,Standard Error,F Statistic,Sig. F Change
0,Use of AI,0.446,0.199,0.196,0.368,56.742,< 0.001
1,Digital Resources,0.358,0.128,0.124,0.39,33.482,< 0.001


In [8]:
intercept_use_of_ai = use_of_ai_model.params["const"]
beta1_use_of_ai = use_of_ai_model.params["ai_competence"]
beta2_use_of_ai = use_of_ai_model.params["digital_resource"]

## The regression equation

In [9]:
equation = f"Use of AI = {intercept_use_of_ai:.1f} + [{beta1_use_of_ai:.1f} * (AI Competence)] + [{beta2_use_of_ai:.1f} * (Digital Resources)]"
print("Regression Equation:")
print(equation)

Regression Equation:
Use of AI = 2.6 + [0.2 * (AI Competence)] + [0.3 * (Digital Resources)]


## Comparing predicted and actual values

In [10]:
database["original_predicted_use_of_ai"] = (
    intercept_use_of_ai +
    beta1_use_of_ai * database["ai_competence"] +
    beta2_use_of_ai * database["digital_resource"]
).round(2)

database["original_absolute_error"] = (
    database["use_of_ai"] - database["original_predicted_use_of_ai"]
).abs().round(2)

database[["use_of_ai", "original_predicted_use_of_ai", "original_absolute_error"]].head()

Unnamed: 0,use_of_ai,original_predicted_use_of_ai,original_absolute_error
0,4.4,4.57,0.17
1,4.19,4.25,0.06
2,4.63,4.25,0.38
3,4.43,4.46,0.03
4,4.11,4.39,0.28


In [11]:
variable_labels = {
    "use_of_ai": "Use of AI",
    "original_predicted_use_of_ai": "Original predicted use of AI",
    "original_absolute_error": "Original absolute error"
}

variables = list(variable_labels.keys())

stats = {
    "Variable": [variable_labels[var] for var in variables],
    "Mean": [database[var].mean() for var in variables],
    "Standard Deviation": [database[var].std() for var in variables]
}

stats_table = pd.DataFrame(stats)

display(stats_table)

Unnamed: 0,Variable,Mean,Standard Deviation
0,Use of AI,4.567298,0.41009
1,Original predicted use of AI,4.567473,0.183066
2,Original absolute error,0.302092,0.207743


# Our change purpose