In [None]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")
print(torch.version.cuda)

True
1
NVIDIA GeForce GTX 1060 with Max-Q Design
12.4


In [2]:
import random
import numpy as np
import pandas as pd
from faker import Faker
import ollama
import torch

# Initialize Faker
fake = Faker()

# How long is the generated dataset
num_rows = 100

# Define categorical options
exchange_locations = ["France", "Germany", "Hungary", "Turkey", "Italy", "Spain", "Netherlands"]
housing_methods = ["Rental Website", "University Website", "CROUS (Student Housing)", "Roommate Website", "Real Estate Agency"]
housing_satisfaction = ["YES", "NO"]

def generate_housing_difficulty(satisfaction:str):
    """Use an LLM model via Ollama to generate housing difficulties."""
    if satisfaction == "YES":
        prompt = (
            "You are a student who is satisfied with your housing situation. "
            "Briefly describe why you are happy, mentioning aspects like affordability, location, or amenities. "
            "Keep it concise (2 sentences max)."
        )
    else:
        prompt = (
            "You are a student struggling to find or secure housing. "
            "Briefly describe a specific issue you faced, such as being scammed, misleading accommodation descriptions, or unresponsive landlords. "
            "Keep it concise (2 sentences max)."
        )
    response = ollama.chat(model='llama3', messages=[{"role": "user", "content": prompt}], options={"use_gpu": True})
    return response['message']['content']

# Ensure GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

# Generate synthetic data
data = []

try:
    for _ in range(num_rows):
        student_status = "YES"
        age = max(18, min(40, int(np.random.normal(25, 5))))  # Normally distributed age
        exchange_semester = random.choice(["YES", "NO"])
        exchange_location = random.choice(exchange_locations)
        exchange_year = random.randint(2015, 2025)
        housing_method = random.choice(housing_methods)
        housing_budget = random.randint(200, 800)  # Budget between 200-800 EUR
        housing_cost = random.randint(housing_budget - 100, housing_budget)  # Ensure cost does not exceed budget
        housing_satisfaction_value = random.choice(housing_satisfaction)
        housing_difficulty = generate_housing_difficulty(housing_satisfaction_value)

        data.append([
            student_status, age, exchange_semester, exchange_location, exchange_year,
            housing_method, housing_difficulty, housing_budget, housing_cost, housing_satisfaction_value
        ])
except KeyboardInterrupt:
    pass
finally:
    # Create DataFrame
    df_synthetic = pd.DataFrame(data, columns=[
        "Student_Status", "Age", "Exchange_Semester", "Exchange_Location", "Exchange_Year",
        "Housing_Finding_Method", "Housing_Difficulties", "Housing_Budget", "Housing_Cost", "Housing_Satisfaction"
    ])
    # Save DataFrame to a file (e.g., CSV)
    df_synthetic.to_csv('synthetic_data.csv', index=False)

# Display the first few rows
df_synthetic.head()


Unnamed: 0,Student_Status,Age,Exchange_Semester,Exchange_Location,Exchange_Year,Housing_Finding_Method,Housing_Difficulties,Housing_Budget,Housing_Cost,Housing_Satisfaction
0,YES,26,YES,Italy,2025,University Website,I'm thrilled with my current housing arrangeme...,564,505,YES
1,YES,22,NO,Turkey,2020,Rental Website,I recently fell victim to a scam when I tried ...,268,234,NO
2,YES,26,NO,Germany,2016,Real Estate Agency,I recently fell victim to a rental scam where ...,300,246,NO
3,YES,20,NO,Spain,2024,University Website,I'm thrilled with my current housing situation...,511,458,YES
4,YES,22,YES,France,2017,University Website,I'm thrilled to be living in a cozy off-campus...,501,496,YES
