# Final Project
#### This notebook simulates the behaviour of a carwash
#### given a set of interarrival time and service time
#### we will find the statistic metrics, the distribution and analyze the results.

In [174]:
from numpy import mean
from scipy import stats
import numpy as np
from random import expovariate

# Load the data:

In [175]:
dataset = np.loadtxt("data.txt", delimiter=',')
arrives = dataset[:,0]
services = dataset[:,1]
it = np.array([i for i in range(len(arrives))])

# Find p value for arrives and services:

In [176]:
arrivesSlope, arrivesIntercept, arrivesRValue, arrivesPValue, arrivesStdErr = stats.linregress(it, arrives)
servicesSlope, servicesIntercept, servicesRValue, servicesPValue, servicesStdErr = stats.linregress(it, services)

# Auto correlation

## AC for arrives

In [177]:
if(arrivesPValue>0.05):
    print("The resulting p value for arrives is: {} and its greater than 0.05.\n".format(arrivesPValue))
    r = (np.correlate(it-it.mean(),it-it.mean(), mode='full'))/(it.var()*len(it))
    arrivesAutoCor = r[r.size//2:]
    print("Autocorrelation for arrives is: ", arrivesAutoCor)
else:
    print("P value for arrives is {}, the condition is not met.".format(arrivesPValue))

The resulting p value for arrives is: 0.531302876867114 and its greater than 0.05.

Autocorrelation for arrives is:  [ 1.          0.7         0.41212121  0.14848485 -0.07878788 -0.25757576
 -0.37575758 -0.42121212 -0.38181818 -0.24545455]


## AC for services

In [178]:
if(servicesPValue>0.05):
    print("The resulting p value for arrives is: {} and its greater than 0.05.\n".format(servicesPValue))
    r = (np.correlate(it-it.mean(),it-it.mean(), mode='full'))/(it.var()*len(it))
    servicesAutoCor = r[r.size//2:]
    print("Autocorrelation for arrives is: ", servicesAutoCor)
else:
    print("P value for arrives is {}, the condition is not met.".format(servicesPValue))

The resulting p value for arrives is: 0.05934418922780965 and its greater than 0.05.

Autocorrelation for arrives is:  [ 1.          0.7         0.41212121  0.14848485 -0.07878788 -0.25757576
 -0.37575758 -0.42121212 -0.38181818 -0.24545455]


In [179]:
#Set the distributions list
distributions = [
    "gamma",
    "expon",
    "lognorm",
    "weibull_min",  
    "weibull_max",
    "norm"
]

# Find distribution

## for arrives

In [185]:
arrivesDistributionResults = []
for distName in distributions:
    dist = getattr(stats, distName)
    fittedData = dist.fit(arrives)
    D, p = stats.kstest(arrives, distName, args=fittedData);
    arrivesDistributionResults.append((distName,p))
auxP = 0
arrivesAuxDist = ""
print("P Values for each distribution:\n")
for distribution in arrivesDistributionResults:
    print("{}: {}".format(distribution[0], distribution[1]))
    if(auxP<distribution[1]):
        auxP = distribution[1]
        arrivesAuxDist = distribution[0]
print("\nThe correct distribution for the dataset is {0} as it has the highest p value ({1}).".format(arrivesAuxDist,auxP))

P Values for each distribution:

gamma: 0.6274911732513193
expon: 0.9154078507629974
lognorm: 0.8886717507648648
weibull_min: 0.33910606708390123
weibull_max: 0.00017984630103362598
norm: 0.5035132633968178

The correct distribution for the dataset is expon as it has the highest p value (0.9154078507629974).


## for services

In [183]:
servicesDistributionResults = []
for distName in distributions:
    dist = getattr(stats, distName)
    fittedData = dist.fit(services)
    D, p = stats.kstest(services, distName, args=fittedData);
    servicesDistributionResults.append((distName,p))
auxP = 0
servicesAuxDist = ""
print("P Values for each distribution:\n")
for distribution in servicesDistributionResults:
    print("{}: {}".format(distribution[0], distribution[1]))
    if(auxP<distribution[1]):
        auxP = distribution[1]
        servicesAuxDist = distribution[0]
print("\nThe correct distribution for the dataset is {0} as it has the highest p value ({1}).".format(servicesAuxDist,auxP))

P Values for each distribution:

gamma: 0.0179151424898778
expon: 0.9516761764619848
lognorm: 0.03363207850499084
weibull_min: 9.725542832561551e-05
weibull_max: 9.269459789296164e-06
norm: 0.7344559764901557

The correct distribution for the dataset is expon as it has the highest p value (0.9516761764619848).


In [None]:
#This block has the function that is intended to calculate the arrival and service time in a random way
#based on the amount of minutes of work in a day.

##It returns two lists: arrival times and services times.

def generateArrivalAndServiceTimes():
    workingDay = 8*60 #8 hours expressed in minutes.
    arrivals = [] #This will contain the arrival times for n works in a day.
    services = [] #This will contain the service times for n works in a day.
    totalAmountOfHours = 0
    while totalAmountOfHours < workingDay:
        nArrivalTime = expovariate(0.5)
        nServiceTime = expovariate(0.6)
        totalAmountOfHours += nArrivalTime
        arrivals.append(nArrivalTime)
        services.append(nServiceTime)
    return arrivals, services