# Final Project
#### This notebook simulates the behaviour of a carwash
#### given a set of interarrival time and service time
#### we will find the statistic metrics, the distribution and analyze the results.

In [187]:
from numpy import mean
from scipy import stats
import numpy as np
from random import expovariate
import matplotlib.pyplot as plt

# Load the data:

In [293]:
dataset = np.loadtxt("data.txt", delimiter=',')
arrives = dataset[:,0]
services = dataset[:,1]
it = np.array([i for i in range(len(arrives))])

# Find p value for arrives and services:

In [303]:
Slope, Intercept, RValue, PValue, StdErr = stats.linregress(dataset)

# Auto correlation

In [305]:
if(arrivesPValue>0.05):
    print("The resulting p value for arrives is: {} and its greater than 0.05.\n".format(arrivesPValue))
    r = (np.correlate(it-it.mean(),it-it.mean(), mode='full'))/(it.var()*len(it))
    arrivesAutoCor = r[r.size//2:]
    print("Autocorrelation for arrives is: ", arrivesAutoCor)
else:
    print("P value for arrives is {}, the condition is not met.".format(arrivesPValue))

The resulting p value for arrives is: 0.30076797128732774 and its greater than 0.05.

Autocorrelation for arrives is:  [ 1.          0.9         0.80044494  0.70177976  0.60444939  0.50889878
  0.41557286  0.32491657  0.23737486  0.15339266  0.07341491 -0.00211346
 -0.0727475  -0.13804227 -0.19755284 -0.25083426 -0.2974416  -0.33692992
 -0.36885428 -0.39276974 -0.40823137 -0.41479422 -0.41201335 -0.39944383
 -0.37664071 -0.34315907 -0.29855395 -0.24238042 -0.17419355 -0.09354839]


In [306]:
#Set the distributions list
distributions = [
    "gamma",
    "expon",
    "lognorm",
    "weibull_min",  
    "weibull_max",
    "norm"
]

# Find distribution

## for arrives

In [307]:
arrivesDistributionResults = []
for distName in distributions:
    dist = getattr(stats, distName)
    fittedData = dist.fit(arrives)
    D, p = stats.kstest(arrives, distName, args=fittedData);
    arrivesDistributionResults.append((distName,p))
auxP = 0
arrivesAuxDist = ""
print("P Values for each distribution:\n")
for distribution in arrivesDistributionResults:
    print("{}: {}".format(distribution[0], distribution[1]))
    if(auxP<distribution[1]):
        auxP = distribution[1]
        arrivesAuxDist = distribution[0]
print("\nThe correct distribution for the dataset is {0} as it has the highest p value ({1}).".format(arrivesAuxDist,auxP))

P Values for each distribution:

gamma: 0.017726561468500712
expon: 0.8575377339716195
lognorm: 0.8205905276555825
weibull_min: 0.001076672930113265
weibull_max: 2.220446049250313e-16
norm: 0.39714998744337104

The correct distribution for the dataset is expon as it has the highest p value (0.8575377339716195).


## for services

In [308]:
servicesDistributionResults = []
for distName in distributions:
    dist = getattr(stats, distName)
    fittedData = dist.fit(services)
    D, p = stats.kstest(services, distName, args=fittedData);
    servicesDistributionResults.append((distName,p))
auxP = 0
servicesAuxDist = ""
print("P Values for each distribution:\n")
for distribution in servicesDistributionResults:
    print("{}: {}".format(distribution[0], distribution[1]))
    if(auxP<distribution[1]):
        auxP = distribution[1]
        servicesAuxDist = distribution[0]
print("\nThe correct distribution for the dataset is {0} as it has the highest p value ({1}).".format(servicesAuxDist,auxP))

P Values for each distribution:

gamma: 0.992276174713438
expon: 0.08448985297392575
lognorm: 1.4741541320972829e-12
weibull_min: 1.5543122344752192e-15
weibull_max: 0.0
norm: 0.9945787734080286

The correct distribution for the dataset is norm as it has the highest p value (0.9945787734080286).


In [266]:
#This block has the function that is intended to calculate the arrival and service time in a random way
#based on the amount of minutes of work in a day.

##It returns two lists: arrival times and services times.

def generateArrivalAndServiceTimes():
    workingDay = 8*60 #8 hours expressed in minutes.
    arrivals = [] #This will contain the arrival times for n works in a day.
    services = [] #This will contain the service times for n works in a day.
    totalAmountOfHours = 0
    while totalAmountOfHours < workingDay:
        nArrivalTime = expovariate(0.5)
        nServiceTime = expovariate(0.6)
        totalAmountOfHours += nArrivalTime
        arrivals.append(nArrivalTime)
        services.append(nServiceTime)
    return arrivals, services