## Imports

In [1]:
import numpy as np
import pandas as pd
import random as rand

## User defined functions

In [2]:
class AnimalPopulation:
    def __init__(self):
        self.create_population()

    def _generate_age(self, species):
        ''' Generates an age of the animal from a distribution depending on the 
        species.'''
        if species == 'cat':
            return rand.gammavariate(1.03, 4)
        elif species == 'dog':
            return rand.gammavariate(1.02, 3)
        elif species == 'rabbit':
            return rand.gammavariate(1.03, 2)
        elif species == 'bird':
            return rand.gammavariate(1.06, 7)
        elif species == 'snake':
            return rand.lognormvariate(0, 1.8)
        else:
            return None
        
    def _generate_gender(self, species):
        ''' Generates the gender of the animal. 
        All species but birds have a 45%/45%/10% chance of being male, 
        female or no gender recorded respectively. Birds have a 5%/5%/90% 
        chance of being male, female or no gender recorded respectively.'''
        if species == 'bird':
            return rand.choices(['male', 'female', pd.NA], weights=[0.05, 0.05, 0.9], k=1)[0]
        else:
            return rand.choices(['male', 'female', pd.NA], weights=[0.45, 0.45, 0.1], k=1)[0]

    def create_population(
            self,
            n=100
            ):
        ''' 
        Create an animal population with pre-defined characteristics.

            Parameters
            ----------
                n : int, default=50
                    Number of animals in dataset
                
        '''
        self.animals_df = pd.DataFrame(
            data={'species': rand.choices(['cat', 'dog', 'rabbit', 'bird', 'snake'], weights=[0.33, 0.40, 0.15, 0.09, 0.03], k=n)}
            )
        # self._add_gender()
        self.animals_df['gender'] = self.animals_df['species'].apply(self._generate_gender)
        self.animals_df['age'] = self.animals_df['species'].apply(self._generate_age)
        


## Experimentation

In [3]:
animals_df = AnimalPopulation().animals_df
animals_df.head(15)

Unnamed: 0,species,gender,age
0,rabbit,female,2.732951
1,cat,female,1.56329
2,dog,female,6.824987
3,dog,male,9.182624
4,cat,male,0.083914
5,dog,male,3.722339
6,dog,female,0.957178
7,dog,female,0.076339
8,dog,male,0.324758
9,rabbit,male,4.86693


## Testing probability functions

In [4]:
def gamma_distribution(k, theta):
    mean=k*theta
    if k<1:
        mode=0
    else:
        mode=(k-1)*theta
    variance=k*theta**2
    skewness=2/np.sqrt(k)
    print(f'For k={k} and theta={theta}:\nMean={round(mean, 2)},  Mode={round(mode, 2)},  Variance = {round(variance, 2)},  Skewness={round(skewness, 2)}\n')

def lognorm_distribution(mu, sigma):
    mean=np.exp(mu+(sigma**2/2))
    mode=np.exp(mu)
    variance=(np.exp(sigma**2)-1)*np.exp(2*mu+sigma**2)
    skewness=(np.exp(sigma**2)+2)*np.sqrt(np.exp(sigma**2)-1)
    print(f'For mu={mu} and sigma={sigma}:\nMean={round(mean, 2)},  Mode={round(mode, 2)},  Variance = {round(variance, 2)},  Skewness={round(skewness, 2)}\n')

In [5]:
print('Cat:')
gamma_distribution(1.03, 4)
print('Dog:')
gamma_distribution(1.02, 3)
print('Rabbit:')
gamma_distribution(1.03, 2)
print('Bird')
gamma_distribution(1.06, 7)
print('Snake')
lognorm_distribution(0, 1.8)

Cat:
For k=1.03 and theta=4:
Mean=4.12,  Mode=0.12,  Variance = 16.48,  Skewness=1.97

Dog:
For k=1.02 and theta=3:
Mean=3.06,  Mode=0.06,  Variance = 9.18,  Skewness=1.98

Rabbit:
For k=1.03 and theta=2:
Mean=2.06,  Mode=0.06,  Variance = 4.12,  Skewness=1.97

Bird
For k=1.06 and theta=7:
Mean=7.42,  Mode=0.42,  Variance = 51.94,  Skewness=1.94

Snake
For mu=0 and sigma=1.8:
Mean=5.05,  Mode=1.0,  Variance = 626.44,  Skewness=136.38

