# Create table in pgadmin


CREATE TABLE students (
    student_id SERIAL PRIMARY KEY,
    first_name TEXT NOT NULL,
    last_name TEXT NOT NULL,
    birth_date DATE NOT NULL,
    gpa FLOAT,
    is_active BOOLEAN,
    notes TEXT NOT NULL
);

In [3]:
import pandas as pd

# Create a small fictional dataset
data = {
    "student_id": [1, 2, 3, 4, 5],  # Primary key (optional in the CSV if PostgreSQL uses SERIAL)
    "first_name": ["Alice", "Bob", "Charlie", "Diana", "Eve"],
    "last_name": ["Smith", "Johnson", "Williams", "Brown", "Jones"],
    "birth_date": ["2001-01-15", "2002-02-20", "2000-05-30", "2001-12-10", "1999-07-25"],
    "gpa": [3.8, 3.5, 3.9, 4.0, 3.2],
    "is_active": [True, False, True, True, False],
    "notes": [
        "Excellent student 😊", 
        "Needs improvement 😕", 
        "Top of the class 🏆", 
        "Good progress 👍", 
        "Special character test: ñ, ü, å, é, 漢字, 한글, عربى"
    ]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Convert booleans to PostgreSQL-friendly format (t/f)
df["is_active"] = df["is_active"].map({True: "t", False: "f"})

# Save the DataFrame to a CSV file
df.to_csv("students.csv", index=False, encoding="utf-8")

print("CSV file 'students.csv' created successfully.")

CSV file 'students.csv' created successfully.


# 1 row of mock data with correct columns

In [1]:
import pandas as pd

# Create a dictionary of mock data for each column
data = {
    "id": [1],
    "listing_url": ["https://www.airbnb.com/rooms/1"],
    "scrape_id": [20241231],
    "last_scraped": ["2024-12-21"],
    "source": ["airbnb"],  # category
    "name": ["Cozy Apartment in the City Center"],
    "description": ["A lovely, bright apartment near public transport."],
    "neighborhood_overview": ["Close to shops and nightlife."],
    "picture_url": ["https://a0.muscache.com/pictures/abc123.jpg"],
    "host_id": [123456],
    "host_url": ["https://www.airbnb.com/users/show/123456"],
    "host_name": ["Jane"],
    "host_since": ["2019-03-15"],
    "host_location": ["New York, USA"],
    "host_about": ["I love hosting people from all over the world!"],
    "host_response_time": ["within an hour"],
    "host_response_rate": [98.0],
    "host_acceptance_rate": [95.0],
    "host_is_superhost": ["t"],
    "host_thumbnail_url": ["https://a0.muscache.com/pictures/user/thumb123.jpg"],
    "host_picture_url": ["https://a0.muscache.com/pictures/user/pic123.jpg"],
    "host_neighbourhood": ["Midtown"],
    "host_listings_count": [2],
    "host_total_listings_count": [2],
    "host_verifications": [["email", "phone"]],
    "host_has_profile_pic": ["t"],
    "host_identity_verified": ["t"],
    "neighbourhood": ["Manhattan"],
    "neighbourhood_cleansed": ["Manhattan"],  # category
    "latitude": [40.7128],
    "longitude": [-74.0060],
    "property_type": ["Apartment"],  # category
    "room_type": ["Entire home/apt"],  # category
    "accommodates": [2],
    "bathrooms": [1.0],
    "bathrooms_text": ["1 bath"],
    "bedrooms": [1],
    "beds": [1],
    "amenities": [["Wifi", "Kitchen", "Heating"]],
    "price": [150.0],
    "minimum_nights": [2],
    "maximum_nights": [30],
    "minimum_minimum_nights": [1],
    "maximum_minimum_nights": [7],
    "minimum_maximum_nights": [30],
    "maximum_maximum_nights": [1125],
    "minimum_nights_avg_ntm": [2.0],
    "maximum_nights_avg_ntm": [30.0],
    "has_availability": ["t"],
    "availability_30": [15],
    "availability_60": [28],
    "availability_90": [60],
    "availability_365": [300],
    "calendar_last_scraped": ["2024-12-20"],
    "number_of_reviews": [10],
    "number_of_reviews_ltm": [5],
    "number_of_reviews_l30d": [1],
    "first_review": ["2023-01-15"],
    "last_review": ["2024-12-01"],
    "review_scores_rating": [98.0],
    "review_scores_accuracy": [10.0],
    "review_scores_cleanliness": [10.0],
    "review_scores_checkin": [9.0],
    "review_scores_communication": [10.0],
    "review_scores_location": [9.0],
    "review_scores_value": [9.0],
    "instant_bookable": ["f"],
    "calculated_host_listings_count": [2],
    "calculated_host_listings_count_entire_homes": [2],
    "calculated_host_listings_count_private_rooms": [0],
    "calculated_host_listings_count_shared_rooms": [0],
    "reviews_per_month": [1.2],
    "bathroom_type": ["private bath"]  # category
}

# Create the DataFrame
df = pd.DataFrame(data)

# (Optional) Convert some columns to categorical to match your original schema
categorical_cols = [
    "source",
    "neighbourhood_cleansed",
    "property_type",
    "room_type",
    "bathroom_type",
]
for col in categorical_cols:
    df[col] = df[col].astype("category")

# Write DataFrame to CSV with one row of mock data
df.to_csv("1row_mock_data.csv", index=False)

print("Mock data CSV file 'mock_data.csv' created successfully!")

Mock data CSV file 'mock_data.csv' created successfully!
