In [38]:
import pandas as pd
from scipy import stats

# Create a DataFrame from the provided data
data = {
    'Smart Thermostat': [50, 80, 60, 30, 20],
    'Smart Light': [70, 100, 90, 50, 50]
}
index = ['Very Satisfied', 'Satisfied', 'Neutral', 'Unsatisfied', 'Very Unsatisfied']
df = pd.DataFrame(data, index=index)

# Perform chi-square test
chi2, p_value, dof, expected = stats.chi2_contingency(df)

print(f"Chi-square statistic: {chi2}")
print(f"P-value: {p_value}")

# Interpretation
alpha = 0.05  # Significance level
if p_value < alpha:
    print("There is a significant association between device type and customer satisfaction.")
else:
    print("There is no significant association between device type and customer satisfaction.")


Chi-square statistic: 5.638227513227513
P-value: 0.22784371130697179
There is no significant association between device type and customer satisfaction.


In [39]:
#1. State the Hypotheses:

# Null Hypothesis (H0): There is no association between device type and customer satisfaction.
# Alternative Hypothesis (H1): There is a significant association between device type and customer satisfaction.


In [40]:
#2. Compute the Chi-Square Statistic:

import pandas as pd
from scipy import stats

# Sample data (replace with your actual data)
data = {
    'Smart Thermostat': [50, 80, 60, 30, 20],
    'Smart Light': [70, 100, 90, 50, 50]
}
index = ['Very Satisfied', 'Satisfied', 'Neutral', 'Unsatisfied', 'Very Unsatisfied']
df = pd.DataFrame(data, index=index)

# Create lists for the device type and satisfaction level
device_type = ['Smart Thermostat'] * len(data['Smart Thermostat']) + ['Smart Light'] * len(data['Smart Light'])
satisfaction = index * 2 # Repeat the index twice, once for each device type

# Create a new DataFrame for the contingency table
df_contingency = pd.DataFrame({'device_type': device_type, 'satisfaction': satisfaction})

# Create a contingency table
contingency_table = pd.crosstab(df_contingency['satisfaction'], df_contingency['device_type'])

# Perform chi-square test
chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table)

print(f"Chi-square statistic: {chi2}")

Chi-square statistic: 0.0


In [41]:
#3. Determine the Critical Value:
# Using the significance level (alpha) of 0.05 and the degrees of freedom (which is the number of categories minus 1)

from scipy.stats import chi2

alpha = 0.05
dof = contingency_table.shape[0] - 1  # Degrees of freedom

critical_value = chi2.ppf(1 - alpha, dof)
print(f"Critical Value: {critical_value}")


Critical Value: 9.487729036781154


In [42]:
# Compare the Chi-Square statistic with the critical value to decide whether to reject the null hypothesis.

# Calculate the chi-squared statistic using the contingency table
chi2_statistic, _, _, _ = stats.chi2_contingency(contingency_table)

if chi2_statistic > critical_value:
    print("Reject the null hypothesis.")
    print("There is a significant association between device type and customer satisfaction.")
else:
    print("Fail to reject the null hypothesis.")
    print("There is no significant association between device type and customer satisfaction.")

Fail to reject the null hypothesis.
There is no significant association between device type and customer satisfaction.


In [43]:
#Include all calculations, the Chi-Square statistic, the critical value, and your conclusion.

import pandas as pd
from scipy import stats
from scipy.stats import chi2

# Sample data (replace with your actual data)
data = {
    'Device': ['Thermostat', 'Thermostat', 'Light', 'Light', 'Thermostat', 'Light', 'Thermostat', 'Light'],
    'Satisfaction': ['High', 'Low', 'High', 'Low', 'High', 'Low', 'Low', 'High']
}
df = pd.DataFrame(data)

# Create a contingency table
contingency_table = pd.crosstab(df['Device'], df['Satisfaction'])
print("Contingency Table:\n", contingency_table)

# Perform chi-square test
chi2_statistic, p_value, dof, expected = stats.chi2_contingency(contingency_table)

print(f"\nChi-square statistic: {chi2_statistic}")
print(f"P-value: {p_value}")
print(f"Degrees of freedom: {dof}")
print(f"Expected frequencies:\n{expected}")

# Interpretation
alpha = 0.05  # Significance level
if p_value < alpha:
    print("\nThere is a significant association between device type and customer satisfaction.")
else:
    print("\nThere is no significant association between device type and customer satisfaction.")

# Calculate the critical value
alpha = 0.05
dof = contingency_table.shape[0] - 1  # Degrees of freedom
critical_value = chi2.ppf(1 - alpha, dof)
print(f"\nCritical Value: {critical_value}")

# Compare chi-square statistic with critical value
if chi2_statistic > critical_value:
    print("\nReject the null hypothesis.")
    print("There is a significant association between device type and customer satisfaction.")
else:
    print("\nFail to reject the null hypothesis.")
    print("There is no significant association between device type and customer satisfaction.")


Contingency Table:
 Satisfaction  High  Low
Device                 
Light            2    2
Thermostat       2    2

Chi-square statistic: 0.0
P-value: 1.0
Degrees of freedom: 1
Expected frequencies:
[[2. 2.]
 [2. 2.]]

There is no significant association between device type and customer satisfaction.

Critical Value: 3.841458820694124

Fail to reject the null hypothesis.
There is no significant association between device type and customer satisfaction.


In [44]:

#Bombay hospitality Ltd. operates a franchise model for producing exotic Norwegian dinners throughout New England.The operating cost for a franchise in a week (W) is given by the equation W = $1,000 + $5X, where X represents the number of units produced in a week.Recent feedback from restaurant owners suggests that this cost model may no longer be accurate,as their observed weekly operating costs

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

# Sample data (replace with your actual data)
data = {
    'UnitsProduced': [100, 150, 200, 250, 300, 350, 400],
    'OperatingCost': [1500, 2000, 2500, 3000, 3500, 4000, 4500]
}
df = pd.DataFrame(data)

# Define the model
model = smf.ols('OperatingCost ~ UnitsProduced', data=df)

# Fit the model
results = model.fit()

# Print the summary of the model
print(results.summary())

# Check if the coefficient for UnitsProduced is significantly different from 5
if results.pvalues['UnitsProduced'] < 0.05 and results.params['UnitsProduced'] != 5:
    print("The cost model may no longer be accurate.")
else:
    print("The cost model appears to be accurate.")



                            OLS Regression Results                            
Dep. Variable:          OperatingCost   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 3.385e+32
Date:                Tue, 17 Sep 2024   Prob (F-statistic):           9.00e-81
Time:                        18:18:40   Log-Likelihood:                 198.24
No. Observations:                   7   AIC:                            -392.5
Df Residuals:                       5   BIC:                            -392.6
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept       500.0000   1.46e-13   3.42e+15

  warn("omni_normtest is not valid with less than 8 observations; %i "


In [45]:

import numpy as np
from scipy.stats import t

# Given data
theoretical_cost_intercept = 1000
theoretical_cost_slope = 5
sample_mean_cost = 3050
sample_size = 25
population_mean_units = 600
population_std_units = 25

# Calculate the expected cost based on the theoretical model
expected_cost = theoretical_cost_intercept + theoretical_cost_slope * population_mean_units

# Calculate the standard error of the mean
standard_error = population_std_units / np.sqrt(sample_size)

# Calculate the t-statistic
t_statistic = (sample_mean_cost - expected_cost) / standard_error

# Calculate the degrees of freedom
degrees_of_freedom = sample_size - 1

# Calculate the p-value
p_value = 2 * (1 - t.cdf(abs(t_statistic), df=degrees_of_freedom))

print(f"T-statistic: {t_statistic}")
print(f"P-value: {p_value}")

# Interpretation
alpha = 0.05  # Significance level
if p_value < alpha:
    print("The sample mean cost is significantly different from the expected cost based on the theoretical model.")
else:
    print("There is no significant difference between the sample mean cost and the expected cost based on the theoretical model.")


T-statistic: -190.0
P-value: 0.0
The sample mean cost is significantly different from the expected cost based on the theoretical model.


In [46]:
#1. State the Hypotheses statement:

# Null Hypothesis (H0): The sample mean cost is not significantly different from the expected cost based on the theoretical model.
# Alternative Hypothesis (H1): The sample mean cost is significantly different from the expected cost based on the theoretical model.


In [47]:
# Use the following formula to calculate the test statistic (t):
# where:
# •	ˉxˉ = sample mean weekly cost (Rs. 3,050)
# •	μ = theoretical mean weekly cost according to the cost model (W = $1,000 + $5X for X = 600 units)
# •	σ = 5*25 units
# •	n = sample size (25 restaurants)

import numpy as np

# Given data
sample_mean_cost = 3050
population_mean_units = 600
population_std_units = 5 * 25  # σ = 5 * 25 units
sample_size = 25
theoretical_cost_intercept = 1000
theoretical_cost_slope = 5

# Calculate the theoretical mean weekly cost
expected_cost = theoretical_cost_intercept + theoretical_cost_slope * population_mean_units

# Calculate the standard error of the mean
standard_error = population_std_units / np.sqrt(sample_size)

# Calculate the t-statistic
t_statistic = (sample_mean_cost - expected_cost) / standard_error

print(f"T-statistic: {t_statistic}")


T-statistic: -38.0


In [48]:
# 3. Determine the Critical Value:
# Using the alpha level of 5% (α = 0.05), determine the critical value from the standard normal (Z) distribution table.

from scipy.stats import norm

alpha = 0.05
critical_value = norm.ppf(1 - alpha / 2)  # Two-tailed test
print(f"Critical Value: {critical_value}")


Critical Value: 1.959963984540054


In [49]:

# Compare the test statistic with the critical value to decide whether to reject the null hypothesis.

if abs(t_statistic) > critical_value:
    print("Reject the null hypothesis.")
    print("The sample mean cost is significantly different from the expected cost based on the theoretical model.")
else:
    print("Fail to reject the null hypothesis.")
    print("There is no significant difference between the sample mean cost and the expected cost based on the theoretical model.")


Reject the null hypothesis.
The sample mean cost is significantly different from the expected cost based on the theoretical model.
