In [12]:
import os
import numpy as np
import pandas as pd
from faker import Faker

# ‚úÖ Ensure we are at project root (important fix!)
if "notebooks" in os.getcwd().lower():
    os.chdir("..")
print("üìÅ Current working directory:", os.getcwd())

fake = Faker()
np.random.seed(42)


üìÅ Current working directory: c:\Documents\plpppp\ai-marriage-likelihood


In [13]:
# Generate synthetic "couple relationship" dataset

num_samples = 300

data = {
    "communication_score": np.random.uniform(0, 10, num_samples),
    "financial_stability": np.random.uniform(0, 10, num_samples),
    "shared_beliefs": np.random.uniform(0, 10, num_samples),
    "emotional_support": np.random.uniform(0, 10, num_samples),
    "conflict_resolution": np.random.uniform(0, 10, num_samples),
    "romantic_initiatives": np.random.uniform(0, 10, num_samples),
    "cultural_compatibility": np.random.uniform(0, 10, num_samples),
    "family_acceptance": np.random.uniform(0, 10, num_samples),
    "career_alignment": np.random.uniform(0, 10, num_samples),
}

df = pd.DataFrame(data)

# Introduce a target variable: marriage likelihood (1 = likely to marry, 0 = unlikely)
df["marriage_likelihood"] = (
    (df["communication_score"] * 0.25 +
     df["emotional_support"] * 0.20 +
     df["shared_beliefs"] * 0.15 +
     df["financial_stability"] * 0.15 +
     df["family_acceptance"] * 0.15 +
     df["cultural_compatibility"] * 0.10) > 6
).astype(int)

df.head()


Unnamed: 0,communication_score,financial_stability,shared_beliefs,emotional_support,conflict_resolution,romantic_initiatives,cultural_compatibility,family_acceptance,career_alignment,marriage_likelihood
0,3.745401,0.516817,1.689351,2.078863,7.582632,5.190818,9.565014,9.150901,3.406042,0
1,9.507143,5.313546,2.785903,0.265322,0.245869,4.791819,7.375084,5.330289,0.717119,0
2,7.319939,5.406351,1.770105,1.814354,0.221236,0.256421,3.532514,1.579548,4.096289,0
3,5.986585,6.374299,0.887025,5.830416,3.236102,3.412478,2.965356,6.958991,3.112175,0
4,1.560186,7.260913,1.206359,4.214246,4.886432,3.801956,3.497032,7.932614,6.771206,0


In [14]:
# ‚úÖ Ensure processed data folder exists before saving
os.makedirs("data/processed", exist_ok=True)

# Save dataset to CSV
output_path = "data/processed/marriage_data.csv"
df.to_csv(output_path, index=False)

print(f"‚úÖ Synthetic dataset saved successfully at: {output_path}")


‚úÖ Synthetic dataset saved successfully at: data/processed/marriage_data.csv


In [15]:
# Verify data saved correctly
df_loaded = pd.read_csv("data/processed/marriage_data.csv")
print(f"Loaded {df_loaded.shape[0]} rows and {df_loaded.shape[1]} columns")
df_loaded.head()

Loaded 300 rows and 10 columns


Unnamed: 0,communication_score,financial_stability,shared_beliefs,emotional_support,conflict_resolution,romantic_initiatives,cultural_compatibility,family_acceptance,career_alignment,marriage_likelihood
0,3.745401,0.516817,1.689351,2.078863,7.582632,5.190818,9.565014,9.150901,3.406042,0
1,9.507143,5.313546,2.785903,0.265322,0.245869,4.791819,7.375084,5.330289,0.717119,0
2,7.319939,5.406351,1.770105,1.814354,0.221236,0.256421,3.532514,1.579548,4.096289,0
3,5.986585,6.374299,0.887025,5.830416,3.236102,3.412478,2.965356,6.958991,3.112175,0
4,1.560186,7.260913,1.206359,4.214246,4.886432,3.801956,3.497032,7.932614,6.771206,0


## Summary

In this notebook, we **generated a synthetic dataset** representing couple relationship dynamics.  
The data simulates real-world factors that may influence marriage likelihood, such as:

- Communication quality  
- Financial stability (providence)  
- Cultural and religious compatibility  
- Shared interests and humor understanding  
- Emotional support and romantic expression  

The dataset was then saved to `data/processed/marriage_data.csv` for further analysis.

> ‚ö†Ô∏è Note: While this project explores social prediction models, it is **purely educational**.  
> Human relationships are complex and ultimately determined by free will and divine providence ‚Äî not by algorithms.
