In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


# 1 ) recreating the diffusions from the raw data


In [None]:
import matplotlib.pyplot as plt
import numpy as np


custom_style = {
    # Font sizes
    "axes.labelsize": 25,
    "axes.titlesize": 20,
    "xtick.labelsize": 20,
    "ytick.labelsize": 20,

    # Line and marker styles
    "lines.linewidth": 3,
    "lines.markersize": 8,
    "lines.color": "black",
    "errorbar.capsize": 5,

    # Axes & spines
    "axes.edgecolor": "black",
    "axes.linewidth": 2,

    # Tick styling
    "xtick.color": "black",
    "ytick.color": "black",
    "xtick.major.width": 1.2,
    "ytick.major.width": 1.2,

    # Grid
    "axes.grid": True,
    "grid.color": "gray",
    "grid.linewidth": 0.7,
    "grid.linestyle": "--",
    "grid.alpha": 0.6,

    # Figure settings
    "figure.figsize": (12, 8),
    "figure.dpi": 300,
    "figure.facecolor": "white"
}


plt.rcParams.update(custom_style)


In [None]:
path_to_data="../01_AI_paper_final/data"

In [None]:
import pandas as pd
df = pd.read_csv(os.path.join(path_to_data,"raw_data_encrypted.csv.zip"))
df.head()

In [None]:
df.head()



In [None]:
import pandas as pd
import matplotlib.pyplot as plt


# Convert date to datetime
df['date'] = pd.to_datetime(df['date'], errors='coerce')

# Extract quarter from date
df['quarter'] = df['date'].dt.to_period('Q')

# Compute mean AI share per quarter, ignoring NaN
mean_per_quarter_df = df.groupby('quarter')['ai_share'].mean()

# Plot
mean_per_quarter_df.sort_index().plot(
    kind='line',
    marker='o',
    xlabel='Quarter',
    ylabel='Mean AI Share',
    title='Mean AI Share per Quarter (Dataset df)'
)

plt.show()


In [None]:
y

# 1 ) Country diffusion
 

In [None]:
# fin_d.to_csv("~/Downloads/country_functions.csv")


fin_d=pd.read_csv(os.path.join(path_to_data,"country_functions.csv"))
print(fin_d.head())

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import t

# define standard error function again for ai_share
def calculate_standard_error(values, confidence=0.95):
    values = np.array(values)
    n = len(values)
    if n <= 1:
        return np.nan
    std_dev = np.std(values, ddof=1)
    stderr = std_dev / np.sqrt(n)
    t_crit = t.ppf((1 + confidence) / 2, df=n - 1)
    return t_crit * stderr

# group by year and country
final_df = (
    fin_d.groupby(["year", "country"])
         .agg(
             country_probability=("ai_share", "mean"),
             std_error=("ai_share", calculate_standard_error)
         )
         .reset_index()
)


# final structure
final_df = final_df[["country", "year", "std_error", "country_probability"]]

print(final_df.head())


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Apply the same styling as USA plot ---
plt.rcParams.update(custom_style)

# --- Load USA data from Excel ---
usa_df = pd.read_excel(os.path.join(path_to_data,"US_time_series.xlsx"), sheet_name="timeseries", skiprows=4)
usa_df = usa_df.iloc[:, [1, 5, 6, 10, 15, 16]]
usa_df.columns = ["ai_share_coef", "ai_share_lower", "ai_share_upper", 
                  "time_yrs_coef", "time_yrs_lower", "time_yrs_upper"]
usa_df = usa_df.apply(pd.to_numeric, errors='coerce').dropna()
usa_df["ai_share_err"] = usa_df["ai_share_upper"] - usa_df["ai_share_coef"]

# --- Country-level smoothed plot ---
window_size = 2  # smoothing window
colors = plt.cm.get_cmap('tab10', 10)

# plt.figure(figsize=(10, 6))  # MATCH the USA figure size exactly

for idx, country in enumerate(final_df['country'].unique()):


    country_data = final_df[final_df['country'] == country]
    smoothed_y_values = country_data['country_probability'].rolling(window=window_size, center=True, min_periods=1).mean()

    yerr_values = country_data['std_error']
    color = colors(idx)

    plt.plot(
        country_data['year'],
        smoothed_y_values,
        label=country.title(),
        color=color,
    )

    plt.errorbar(
        country_data['year'],
        smoothed_y_values,
        yerr=yerr_values,
        fmt='o',
        capsize=5,
        color=color
    )

# Filter USA data for x-axis > 2019.3
usa_df_filtered = usa_df[usa_df["time_yrs_coef"] > 2019.3]

# usa_df_filtered=usa_df

# --- Add USA line plot (thick, grey, no points) ---
plt.plot(
    usa_df_filtered["time_yrs_coef"],
    usa_df_filtered["ai_share_coef"],
    label="United States",
    color="grey",
    linewidth=3,
)

# --- Add shaded error band instead of error bars ---
plt.fill_between(
    usa_df_filtered["time_yrs_coef"],
    usa_df_filtered["ai_share_lower"],
    usa_df_filtered["ai_share_upper"],
    color="grey",
    alpha=0.3,
)

# --- Styling ---
plt.xlabel('Year', fontsize=28)  # MATCH USA plot fontsize
plt.ylabel('Corrected share of AI-generated code', fontsize=27)  # MATCH USA ylabel
ax = plt.gca()
ax.grid(axis='y', color='gray', linestyle='--', linewidth=0.5, alpha=0.3)
ax.xaxis.grid(False)


plt.xlim(left=2019.01)  # also match USA xlim

# Legend
plt.legend(fontsize=18)  # MATCH USA legend font

# Final layout
plt.tight_layout()

# Save with consistent padding
# plt.savefig(f"/Users/Danio001/Downloads/ai_final_plots/country_adoption.pdf", format="pdf", bbox_inches='tight')
plt.show()
