In [15]:
import numpy as np
import random
import statistics
from math import sqrt
from scipy.stats import t
from scipy.stats import norm

In [1]:
bedrooms = [1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6]
prices = [120, 133, 139, 185, 148, 160, 192, 205, 244, 213, 236, 280, 275, 273, 312, 311, 304, 415, 396, 488]

# create a dictionary to store the mean and median of prices for each bedroom count
bedroom_stats = {}

# loop through the unique bedroom counts in the dataset
for num_bedrooms in set(bedrooms):
    # filter the prices for the current number of bedrooms
    bedroom_prices = [prices[i] for i in range(len(bedrooms)) if bedrooms[i] == num_bedrooms]
    # compute the mean and median of the prices
    mean_price = sum(bedroom_prices) / len(bedroom_prices)
    median_price = sorted(bedroom_prices)[len(bedroom_prices) // 2]
    # store the mean and median in the dictionary
    bedroom_stats[num_bedrooms] = {"mean": mean_price, "median": median_price}

# print the results
for num_bedrooms in bedroom_stats:
    print(f"Bedrooms: {num_bedrooms}\tMean price: {bedroom_stats[num_bedrooms]['mean']:.2f}\tMedian price: {bedroom_stats[num_bedrooms]['median']}")


Bedrooms: 1	Mean price: 130.67	Median price: 133
Bedrooms: 2	Mean price: 171.25	Median price: 185
Bedrooms: 3	Mean price: 246.57	Median price: 244
Bedrooms: 4	Mean price: 309.00	Median price: 311
Bedrooms: 5	Mean price: 405.50	Median price: 415
Bedrooms: 6	Mean price: 488.00	Median price: 488


In [9]:
# extract a random sample of size 10
random.seed(42)  # set the random seed for reproducibility
sample = random.sample(bedrooms, 10)

# compute the sample mean and standard deviation
sample_mean = statistics.mean(sample)
sample_stdev = statistics.stdev(sample)

# compute the population mean
population_mean = statistics.mean(bedrooms)

# compute the 95% confidence interval using the sample
alpha = 0.05
degrees_freedom = len(sample) - 1
t_critical = t.ppf(1 - alpha/2, degrees_freedom)
margin_of_error = t_critical * sample_stdev / sqrt(len(sample))
lower_bound = sample_mean - margin_of_error
upper_bound = sample_mean + margin_of_error

# check if the population mean lies between the upper and lower bounds of the interval
if lower_bound <= population_mean <= upper_bound:
    print("The population mean lies within the confidence interval.")
else:
    print("The population mean does not lie within the confidence interval.")

# print the results
print(f"Sample: {sample}")
print(f"Sample mean: {sample_mean:.2f}")
print(f"Sample standard deviation: {sample_stdev:.2f}")
print(f"Population mean: {population_mean:.2f}")
print(f"Confidence interval: [{lower_bound:.2f}, {upper_bound:.2f}]")

###  Extracts a random sample of size 10 from the Bedrooms data points

In [10]:
bedrooms = [1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6]

random_sample = random.sample(bedrooms, k=10)

print(random_sample)

[1, 5, 3, 1, 1, 4, 2, 5, 3, 3]


### Compute the mean and standard deviation of the sample

In [5]:
bedrooms = [1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6]
mean = statistics.mean(bedrooms)
print(f"The mean of the population is: {mean:.2f}")

The mean of the population is: 3.00


### Compute the the mean of the population

In [7]:
prices = [120, 133, 139, 185, 148, 160, 192, 205, 244, 213, 236, 280, 275, 273, 312, 311, 304, 415, 396, 488]

mean = statistics.mean(prices)

print(f"The mean of the population is: {mean:.2f}")

The mean of the population is: 251.45


### Computer the confidence interval of 95% using the sample 

In [11]:
prices = [120, 133, 139, 185, 148, 160, 192, 205, 244, 213, 236, 280, 275, 273, 312, 311, 304, 415, 396, 488]

sample = np.random.choice(prices, size=10)

sample_mean = np.mean(sample)
sample_std = np.std(sample, ddof=1)
n = len(sample)
t_value = t.ppf(0.975, n - 1)

margin_of_error = t_value * sample_std / np.sqrt(n)
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

print(f"The 95% confidence interval is: ({confidence_interval[0]:.2f}, {confidence_interval[1]:.2f})")


The 95% confidence interval is: (196.13, 360.67)


### Check if the population mean lies between the upper and lower bounds of the interval
Cl=x ̅  ±z.a/√n

In [14]:
prices = [120, 133, 139, 185, 148, 160, 192, 205, 244, 213, 236, 280, 275, 273, 312, 311, 304, 415, 396, 488]

sample = np.random.choice(prices, size=10)

sample_mean = np.mean(sample)
sample_std = np.std(sample, ddof=1)
n = len(sample)
z_critical = norm.ppf(0.975)

margin_of_error = z_critical * sample_std / np.sqrt(n)
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

population_mean = np.mean(prices)

if confidence_interval[0] <= population_mean <= confidence_interval[1]:
    print("The population mean is within the confidence interval.")
else:
    print("The population mean is outside the confidence interval.")


The population mean is within the confidence interval.
