In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import the necessary libraries

In [None]:
import pandas as pd
import numpy as np
import pickle
import re
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Apply gender model

In [None]:
# Load pre-trained model and encoders
MODEL_PATH = '/content/drive/My Drive/Thesis/model/blstm_gender_model.h5'
TOKENIZER_PATH = '/content/drive/My Drive/Thesis/model/tokenizer.pickle'
ENCODER_PATH = '/content/drive/My Drive/Thesis/model/label_encoder.pickle'

model = load_model(MODEL_PATH)
with open(TOKENIZER_PATH, 'rb') as handle:
    tokenizer = pickle.load(handle)
with open(ENCODER_PATH, 'rb') as file:
    label_encoder = pickle.load(file)



In [None]:
# Preprocessing and Prediction Functions
def preprocess_names(names, maxlen=15):
    sequences = tokenizer.texts_to_sequences(names)
    return pad_sequences(sequences, padding='post', maxlen=maxlen)

def predict_gender(names):
    names = [str(name) for name in names]
    processed = preprocess_names(names)
    predictions = model.predict(processed)
    predicted_labels = np.argmax(predictions, axis=1)
    return label_encoder.inverse_transform(predicted_labels)


In [None]:
# Load Processed Name Data
input_csv = '/content/drive/My Drive/Thesis/processed_names.csv'
df = pd.read_csv(input_csv)

# Apply Gender Prediction
df['predicted_gender'] = predict_gender(df['first_name'])

# Group and Summarize by Year and Journal
summary = df.groupby(['year', 'journal', 'predicted_gender']).size().unstack(fill_value=0).reset_index()

# Export Results to CSV
output_full = '/content/drive/My Drive/Thesis/processed_names_with_gender.csv'
df.to_csv(output_full, index=False)

output_summary = '/content/drive/My Drive/Thesis/gender_summary_by_year_journal.csv'
summary.to_csv(output_summary, index=False)

print(f"Saved detailed predictions to: {output_full}")
print(f"Saved summary statistics to: {output_summary}")


[1m1541/1541[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step
Saved detailed predictions to: /content/drive/My Drive/Thesis/processed_names_with_gender.csv
Saved summary statistics to: /content/drive/My Drive/Thesis/gender_summary_by_year_journal.csv
