# Housing Analysis using Numpy

In [6]:
# Loading the package 
import numpy as np 

### Loading the housing dataset from CSV file

In [8]:
data=np.genfromtxt('Housing_new.csv',delimiter=',',skip_header=True)

In [9]:
type(data)

numpy.ndarray

### Extracting relevant columns (e.g. price, area,...)

In [12]:
price=data[:,0]
#print(price[:10])
area=data[:,1]
bedrooms=data[:,2]
bathrooms=data[:,3]
parking=data[:,-1]

### Basic descriptive statistics
* Mean, Median, SD, Min, Max

In [16]:
#Price
print('...Price Summary...')
print(f'Mean: {round(np.mean(price))}')
print(f'Median: {np.median(price)}')
print(f'Standard deviation: {round(np.std(price))}')
print(f'Min: {np.min(price)}')
print(f'Max: {np.max(price)}')

...Price Summary...
Mean: 4766729.247706422
Median: 4340000.0
Standard deviation: 1868722.8281312082
Min: 1750000.0
Max: 13300000.0


In [18]:
#Area
print('...Area Summary...')
print(f'Mean: {round(np.mean(area))}')
print(f'Median: {np.median(area)}')
print(f'Standard deviation: {round(np.std(area))}')
print(f'Min: {np.min(area)}')
print(f'Max: {np.max(area)}')

...Area Summary...
Mean: 5151
Median: 4600.0
Standard deviation: 2168
Min: 1650.0
Max: 16200.0


### Data filtering

In [19]:
print(f"Number of houses before filtering: {len(data)}")


Number of houses before filtering: 545


In [20]:
#Select the houses with more than 3 bedrooms and more than 2 bathrooms
filtered_data= data[(bedrooms>3) & (bathrooms>2)]

In [25]:
print(f"Number of houses after filtering: {len(filtered_data)}")

Number of houses after filtering: 7


In [28]:
# Select the houses where the area more than 8000 square feet and at least 3 bedrooms and at least 2 parking spaces.
data_filter= data[(bedrooms>=3) & (area>8000) & (parking>=2)]

In [29]:
print(f"Number of houses after filtering: {len(data_filter)}")

Number of houses after filtering: 25


### Number of unique categories

In [31]:
print(f"Unique bedroom counts: {np.unique(bedrooms)}")
print(f"Unique bathroom counts: {np.unique(bathrooms)}")

Unique bedroom counts: [1. 2. 3. 4. 5. 6.]
Unique bathroom counts: [1. 2. 3. 4.]


### Correlation 

In [33]:
#Default rowvar= True (calculate the correlation for each rows)
# rowvar= False (each column represent a variable)
corr_house= np.corrcoef(data,rowvar=False)
print("Correlation matrix: ")
print(corr_house)

Correlation matrix: 
[[1.         0.53599735 0.36649403 0.51754534 0.38439365]
 [0.53599735 1.         0.15185849 0.19381953 0.35298048]
 [0.36649403 0.15185849 1.         0.37393024 0.1392699 ]
 [0.51754534 0.19381953 0.37393024 1.         0.17749582]
 [0.38439365 0.35298048 0.1392699  0.17749582 1.        ]]


#### Correlation 
- Price and area: 53% (strong)
- Price and bedrooms: 37% (weak)
- Price and bathrooms: 52% (strong)
- Price and parking: 38% (weak)