In [1]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)
n = 300  # number of rows

# Generate realistic synthetic data
OverallQual = np.random.randint(1, 11, n)  # 1-10 quality
GrLivArea = np.random.randint(500, 4000, n)  # sqft
GarageCars = np.random.randint(0, 4, n)  # 0-3 cars
TotalBsmtSF = np.random.randint(0, 2500, n)  # basement sqft
YearBuilt = np.random.randint(1900, 2020, n)  # year built
LotArea = np.random.randint(2000, 15000, n)  # lot size in sqft

# Generate target SalePrice with some noise
SalePrice = (5000 * OverallQual + 50 * GrLivArea + 10000 * GarageCars +
             20 * TotalBsmtSF + 100 * (YearBuilt - 1900) + 0.5 * LotArea +
             np.random.normal(0, 20000, n))

# Create DataFrame
df = pd.DataFrame({
    'OverallQual': OverallQual,
    'GrLivArea': GrLivArea,
    'GarageCars': GarageCars,
    'TotalBsmtSF': TotalBsmtSF,
    'YearBuilt': YearBuilt,
    'LotArea': LotArea,
    'SalePrice': SalePrice
})

# Save as CSV
df.to_csv("housing_6_features.csv", index=False)

print("Dataset generated and saved as 'housing_6_features.csv'.")
print(df.head())


Dataset generated and saved as 'housing_6_features.csv'.
   OverallQual  GrLivArea  GarageCars  TotalBsmtSF  YearBuilt  LotArea  \
0            7       1560           0         1358       1939     3762   
1            4       2827           2         1019       1914    14112   
2            8       3920           0          541       1920     9101   
3            5       3044           0          758       1946     9674   
4            7        801           1         2128       1972    14652   

       SalePrice  
0  153314.466177  
1  202319.223753  
2  253945.396459  
3  227366.037252  
4  145957.981360  


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   OverallQual  300 non-null    int32  
 1   GrLivArea    300 non-null    int32  
 2   GarageCars   300 non-null    int32  
 3   TotalBsmtSF  300 non-null    int32  
 4   YearBuilt    300 non-null    int32  
 5   LotArea      300 non-null    int32  
 6   SalePrice    300 non-null    float64
dtypes: float64(1), int32(6)
memory usage: 9.5 KB


: 