In [None]:
import numpy as np

#i import pandas for data anlysis and manipulations
import pandas as pd

#matplotlib library for plotting 2D graphics
import matplotlib.pyplot as plt

In [9]:
# loading the datasest

df = pd.read_sas("bs04retail.sas7bdat")
df

In [10]:
df.dtypes

In [11]:
df['Store'] = df['Store'].astype(int)
df.head()

In [12]:
#df.isnull()


# Building the Hypothesis Testing system

In [6]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind, ttest_1samp

#Initialize the Hypothesis class
class Hypothesis:
    def __init__(self, df):
        self.df = df
        
        # one tail test method
    def one_tailed_test(self, item, alpha=0.05):
        try:
            df_item = np.array(self.df[item])
            mean_difference = None
            correlation = None
            t_stat, pvalue = ttest_1samp(df_item, alpha)
            
            if pvalue <= alpha and t_stat > 0:
                result = f"\n since p_value = {pvalue:.9f} <= {alpha} and {t_stat:.4f} > 0, we Reject the null hypothesis. There is a significant positive difference in {item}."
                
            else:
                result = f"\n since p_value = {pvalue:.9f} > {alpha}, there's no sufficient evidence to reject the null hypothesis H0. There is no significant difference in {item}."
                

            return result, t_stat, pvalue, mean_difference, correlation

        except KeyError as e:
            return f"Error: The item {e} not found in the DataFrame. Please enter a valid item name."

        except Exception as e:
            return f"Error: An unexpected error occurred. Details: {str(e)}"
        
        # two tail test method
    def two_tailed_test(self, item1, item2, alpha=0.05):
        try:
            df_item1 = np.array(self.df[item1])
            df_item2 = np.array(self.df[item2])
            
            t_stat, pvalue = ttest_ind(df_item1, df_item2, equal_var=False)  # Using Welch's t-test
            mean_difference = df_item1.mean() - df_item2.mean()
            correlation = np.corrcoef(df_item1, df_item2)[0, 1]
            
            
            if pvalue <= alpha:
                result = f"\nsince p_value = {pvalue:.4f} <= {alpha}, we Reject the null hypothesis. There is a significant difference between {item1} and {item2}."
                
            else:
                result = f"\nsince p_value = {pvalue:.4f} > {alpha}, we cannot reject the null hypothesis H0. There is no significant difference between {item1} and {item2}. hence, the data does not provide enough evidence to support the claim that there is a significant difference between the two items"
                

            return result, t_stat, pvalue, mean_difference, correlation

        except KeyError as e:
            return f"Error: One or both items {e} not found in the DataFrame. Please enter valid item names."
        

        except Exception as e:
            return f"Error: An unexpected error occurred. Details: {str(e)}"
        
        except:
            print("Some other exception happened.")

        
        
      



In [None]:
def main(data):
    try:
        #  available items for testing
        print("\nAvailable items for testing are:", ", ".join(data.columns))
    

        # Instantiating the Hypothesis class
        tester = Hypothesis(data)
    

         # available test types for user to choose
        print("\nAvailable test types are: one-tailed, two-tailed")


        # Choose type of test (one-tailed or two-tailed)
        test_type = input("\nwhat type of test do you want to perform ?: ").lower()

        if test_type == 'one-tailed':
           
           # User: Choose item to test
            item = input("\nplease enter the name of the item/product you want to test: ").strip()

            # User input: Choose significance level (alpha)
            alpha = float(input("\nplease Enter significance levelfor your test (alpha): "))

            result, t_stat, pvalue, mean_difference, correlation = tester.one_tailed_test(item, alpha)
            
            
        elif test_type == 'two-tailed':
            # User input: Choose items to test
            items = input("\nplease enter the names of the items you want to test (comma-separated): ").split(',')

            # User input: Choose significance level (alpha)
            alpha = float(input("\nEnter significance level (alpha): "))

            result, t_stat, pvalue, mean_difference, correlation = tester.two_tailed_test(items[0], items[1], alpha)
            
            
        
        else:
            result = "Error: Invalid test type. Please choose either 'one-tailed' or 'two-tailed'."
           # correlation = None

        # Print the t-test results
        print(result)

            # Print additional details if available
        if mean_difference and correlation is not None:
            print(f"\nmean Difference between {items[0]} and {items[1]}: {mean_difference:.2f}")
            print(f"\nthe corelation between {items[0]} and {items[1]}: {correlation:.3f}")
            if mean_difference >0:
                print(f"\nthe data constitute significant evidence that the underlying mean No was greater for {items[0]}, by an estimated value of {mean_difference:.3f}. the result suggest that {items[0]} should be preferred")
                #print(f"\nthe corelation between {items[0]} and {items[1]}: {correlation:.3f}")
            else:
                print(f"\nthe data constitute significant evidence that the underlying mean No was greater for {items[1]}, by an estimated value of {abs(mean_difference):.3f}. the result suggest that {items[1]} should be preferred")
                #print(f"\nthe corelation between {items[0]} and {items[1]}: {correlation:.3f}")
            

        # Print the t-statistic and p-value
        print(f"\nt-statistic: {t_stat:.4f}")
        print(f"\npvalue: {pvalue:.9f}")
        
        
          # Output table containing sample size, mean, variance, and Pearson correlation
        stat_table = pd.DataFrame(columns=['Item', 'N', 'Mean', 'Meadian', 'std_deviation', 'Variance', 'min_val', 'max_val'])
        for item in data.columns:
            
            sample_size = len(data[item])
            mean = round(data[item].mean(),3)
            median = round(data[item].median(), 2)
            st_deviation = round(data[item].std(), 2)
            variance = round(data[item].var(),2)
            mini_val = data[item].min()
            max_val = data[item].max()
            stat_table = pd.concat([stat_table, pd.DataFrame({
            'Item': [item],
            'N': [sample_size],
            'Mean': [mean],
            'Meadian': [median],
            'std_deviation' : [st_deviation],
            'Variance': [variance],
            'min_val' : [mini_val],
            'max_val': [max_val]
        })], ignore_index=True)

        print("\nstatistic Table:")
        print(stat_table)
        
    except IndexError:
            return f"Error:invalid items name or index error, please enter the correct item/product names."
        
    except UnboundLocalError:
        return f"UnboundLocalError: Invalid test type. Please choose either 'one-tailed' or 'two-tailed'."
    
    except ValueError:
        return f"ValueError: please enter numeric value and not character."
    
    except:
        print("Some other exception happened.")

In [None]:


def hypothesis_System_interface(data):
    tester = Hypothesis(data)
    
    while True:
        try:
            print("\nAvailable items for testing are:", ", ".join(data.columns))
            
            print("\nAvailable test types are: one-tailed, two-tailed")
            test_type = input("\nWhat type of test do you want to perform? (Enter 'one-tailed', 'two-tailed', or 'quit'): ").lower()
            
            if test_type == 'quit' or test_type == 'q':
                print("Exiting the application. Goodbye!")
                break
            elif test_type == 'one-tailed':
                item = input("\nPlease enter the name of the item/product you want to test: ").strip()
                alpha = float(input("\nPlease enter the significance level for your test (alpha): "))
                result, t_stat, pvalue, mean_difference, correlation = tester.one_tailed_test(item, alpha)
            elif test_type == 'two-tailed':
                items = input("\nPlease enter the names of the items you want to test (comma-separated): ").split(',')
                alpha = float(input("\nEnter significance level (alpha): "))
                result, t_stat, pvalue, mean_difference, correlation = tester.two_tailed_test(items[0], items[1], alpha)
            else:
                result = "Error: Invalid test type. Please choose either 'one-tailed' or 'two-tailed'."
            
            print(result)
            
            if 'mean_difference' in locals() and 'correlation' in locals() and mean_difference is not None and correlation is not None:
                print(f"\nMean Difference between {items[0]} and {items[1]}: {mean_difference:.2f}")
                print(f"Correlation between {items[0]} and {items[1]}: {correlation:.3f}")
                if mean_difference > 0:
                    print(f"The data constitutes significant evidence that the underlying mean number was greater for {items[0]}, by an estimated value of {mean_difference:.3f}. The result suggests that {items[0]} should be preferred.")
                else:
                    print(f"The data constitutes significant evidence that the underlying mean number was greater for {items[1]}, by an estimated value of {abs(mean_difference):.3f}. The result suggests that {items[1]} should be preferred.")
            
            print(f"\nT-statistic: {t_stat:.4f}")
            print(f"P-value: {pvalue:.9f}")
            
            stat_table = pd.DataFrame(columns=['Item', 'N', 'Mean', 'Median', 'Std_Deviation', 'Variance', 'Min_Val', 'Max_Val'])
            for item in data.columns:
                sample_size = len(data[item])
                mean = round(data[item].mean(), 3)
                median = round(data[item].median(), 2)
                st_deviation = round(data[item].std(), 2)
                variance = round(data[item].var(), 2)
                mini_val = data[item].min()
                max_val = data[item].max()
                stat_table = pd.concat([stat_table, pd.DataFrame({
                    'Item': [item],
                    'N': [sample_size],
                    'Mean': [mean],
                    'Median': [median],
                    'Std_Deviation': [st_deviation],
                    'Variance': [variance],
                    'Min_Val': [mini_val],
                    'Max_Val': [max_val]
                })], ignore_index=True)
            
            print("\nStatistic Table:")
            print(stat_table)
            
            more_operations = input("\nDo you want to perform more operations? (Enter 'yes' to continue or 'no' to quit): ").lower()
            if more_operations != 'yes':
                print("Exiting the application. Goodbye!")
                break
            
        except IndexError:
            print("Error: Invalid item name or index error. Please enter the correct item/product names.")
        except UnboundLocalError:
            print("UnboundLocalError: Invalid test type. Please choose either 'one-tailed' or 'two-tailed'.")
        except ValueError:
            print("ValueError: Please enter a numeric value and not a character.")
        except Exception as e:
            print(f"An error occurred: {e}")

 


In [None]:
hypothesis_System_interface(df)