# Inputs & outputs
- **Inputs:** `final_data/raw_data_encrypted_final.csv.zip` for global adoption and `final_data/full_countries.csv` for country-level breakdowns.
- **Outputs:** diffusion curves of AI code share overall and smoothed country trajectories used in the main figure.

In [ ]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 1 ) recreating the diffusions from the full-user raw data

In [None]:


custom_style = {
    # Font sizes
    "axes.labelsize": 25,
    "axes.titlesize": 20,
    "xtick.labelsize": 20,
    "ytick.labelsize": 20,

    # Line and marker styles
    "lines.linewidth": 3,
    "lines.markersize": 8,
    "lines.color": "black",
    "errorbar.capsize": 5,

    # Axes & spines
    "axes.edgecolor": "black",
    "axes.linewidth": 2,

    # Tick styling
    "xtick.color": "black",
    "ytick.color": "black",
    "xtick.major.width": 1.2,
    "ytick.major.width": 1.2,

    # Grid
    "axes.grid": True,
    "grid.color": "gray",
    "grid.linewidth": 0.7,
    "grid.linestyle": "--",
    "grid.alpha": 0.6,

    # Figure settings
    "figure.figsize": (12, 8),
    "figure.dpi": 300,
    "figure.facecolor": "white"
}


plt.rcParams.update(custom_style)

In [None]:

path_to_data="./final_data"

In [None]:
df = pd.read_csv(os.path.join(path_to_data,"raw_data_encrypted_final.csv.zip"))
df.head()

In [None]:
df.head()

In [None]:


# Convert date to datetime
df['date'] = pd.to_datetime(df['date'], errors='coerce')

# Extract quarter from date
df['quarter'] = df['date'].dt.to_period('Q')

# Compute mean AI share per quarter, ignoring NaN
mean_per_quarter_df = df.groupby('quarter')['ai_share'].mean()

# Plot
mean_per_quarter_df.sort_index().plot(
    kind='line',
    marker='o',
    xlabel='Quarter',
    ylabel='Mean AI Share',
    title='Mean AI Share per Quarter (Dataset df)'
)

plt.show()

# 2 ) Country diffusion 

In [None]:
full_df=pd.read_csv("./final_data/full_countries.csv")
full_df.head()

In [None]:


window_size = 2
colors = plt.cm.get_cmap('tab10', 10)

for idx, country in enumerate(full_df['country'].unique()):
    country_data = full_df[full_df['country'] == country].sort_values('year')

    smoothed_y = (
        country_data['country_probability']
        .rolling(window=window_size, center=True, min_periods=1)
        .mean()
    )

    color = colors(idx)

    plt.plot(
        country_data['year'],
        smoothed_y,
        label=country.title(),
        color=color,
    )

    plt.errorbar(
        country_data['year'],
        smoothed_y,
        yerr=country_data['std_error'],
        fmt='o',
        capsize=5,
        color=color,
    )

plt.xlabel('Year', fontsize=28)
plt.ylabel('Corrected share of AI-generated code', fontsize=27)

ax = plt.gca()
ax.grid(axis='y', color='gray', linestyle='--', linewidth=0.5, alpha=0.3)
ax.xaxis.grid(False)

plt.xlim(left=2019.01)
plt.legend(fontsize=18)
plt.tight_layout()
plt.show()