In [13]:
# %load "C:/Users/wangj337/Google Drive/Courses/IntroAI-CS50/2Uncertainty/heredity/heredity.py"
import csv
import itertools
import sys

PROBS = {

    # Unconditional probabilities for having gene
    "gene": {
        2: 0.01,
        1: 0.03,
        0: 0.96
    },

    "trait": {

        # Probability of trait given two copies of gene
        2: {
            True: 0.65,
            False: 0.35
        },

        # Probability of trait given one copy of gene
        1: {
            True: 0.56,
            False: 0.44
        },

        # Probability of trait given no gene
        0: {
            True: 0.01,
            False: 0.99
        }
    },

    # Mutation probability
    "mutation": 0.01
}


def main():

    # Check for proper usage
    if len(sys.argv) != 2:
        sys.exit("Usage: python heredity.py data.csv")
    people = load_data(sys.argv[1])

    # Keep track of gene and trait probabilities for each person
    probabilities = {
        person: {
            "gene": {
                2: 0,
                1: 0,
                0: 0
            },
            "trait": {
                True: 0,
                False: 0
            }
        }
        for person in people
    }

    # Loop over all sets of people who might have the trait
    names = set(people)
    for have_trait in powerset(names):

        fails_evidence = any(
            (people[person]["trait"] is not None and
             people[person]["trait"] != (person in have_trait))
            # if trait is availabe from knowledge but not captured in the have_trait set, 
            # then we know we should skip this because probability = 0
            for person in names
        )
        if fails_evidence:
            continue

        # Loop over all sets of people who might have the gene
        for one_gene in powerset(names):
            for two_genes in powerset(names - one_gene):
                # make sure 1) one_gene and two_gene are exclusive
                # 2) one_gene + two_gene <= names            
                # Update probabilities with new joint probability
                p = joint_probability(people, one_gene, two_genes, have_trait)
                update(probabilities, one_gene, two_genes, have_trait, p)

    # Ensure probabilities sum to 1
    normalize(probabilities)

    # Print results
    for person in people:
        print(f"{person}:")
        for field in probabilities[person]:
            print(f"  {field.capitalize()}:")
            for value in probabilities[person][field]:
                p = probabilities[person][field][value]
                print(f"    {value}: {p:.4f}")


def load_data(filename):
    """
    Load gene and trait data from a file into a dictionary.
    File assumed to be a CSV containing fields name, mother, father, trait.
    mother, father must both be blank, or both be valid names in the CSV.
    trait should be 0 or 1 if trait is known, blank otherwise.
    """
    data = dict()
    with open(filename) as f:
        reader = csv.DictReader(f)
        for row in reader:
            name = row["name"]
            data[name] = {
                "name": name,
                "mother": row["mother"] or None,
                "father": row["father"] or None,
                "trait": (True if row["trait"] == "1" else
                          False if row["trait"] == "0" else None)
            }
    return data


def powerset(s):
    # return all non-repeated subsets of s 
    """
    Return a list of all possible subsets of set s.
    """
    s = list(s)
    return [
        set(s) for s in itertools.chain.from_iterable(
            itertools.combinations(s, r) for r in range(len(s) + 1)
        )
    ]


In [50]:
def joint_probability(people, one_gene, two_genes, have_trait):
    """
    Compute and return a joint probability.

    The probability returned should be the probability that
        * everyone in set `one_gene` has one copy of the gene, and
        * everyone in set `two_genes` has two copies of the gene, and
        * everyone not in `one_gene` or `two_gene` does not have the gene, and
        * everyone in set `have_trait` has the trait, and
        * everyone not in set` have_trait` does not have the trait.
    """
    
    p = 1
    
    # update contribution of probability from gene
    for person in set(people):
        mother = people[person]['mother']
        father = people[person]['father']
        # everyone in set `one_gene` has one copy of the gene
        if person in one_gene: 
            if mother and father:
                p *= compute_child(father, mother, 1, one_gene, two_genes)
            else: 
                p *= PROBS["gene"][1] 
            # update trait
            p *= PROBS["trait"][1][True] if person in have_trait else PROBS["trait"][1][False] 
        # everyone in set `two_genes` has two copies of the gene
        elif person in two_genes:
            if mother and father:
                p *= compute_child(father, mother, 2, one_gene, two_genes)
            else: 
                p *= PROBS["gene"][2] 
            # update trait
            p *= PROBS["trait"][2][True] if person in have_trait else PROBS["trait"][2][False] 
        # everyone not in `one_gene` or `two_gene` does not have the gene
        else:
            if mother and father:
                p *= compute_child(father, mother, 0, one_gene, two_genes)
            else: 
                p *= PROBS["gene"][0] 
            # update trait
            p *= PROBS["trait"][0][True] if person in have_trait else PROBS["trait"][0][False] 
    
    return p
    
      
    
def compute_child(father, mother, n, one_gene, two_genes):
    """
    computes and returns the probability of the child having n copies of genes
    given the name of this person's mother and father
    """
    # the prpbability that father passes 1 or 0 gene to the child
    from_father = {
        "gene": {
            1: 0,
            0: 0
        }
    }   
    if father in two_genes:
        from_father["gene"][1] = 1 - PROBS["mutation"]
        from_father["gene"][0] = PROBS["mutation"]
    elif father in one_gene:
        from_father["gene"][1] = 0.5 # 0.5 * PROB["mutation"] + 0.5 * (1 - PROB["mutation"])
        from_father["gene"][0] = 0.5
    else:
        from_father["gene"][1] = PROBS["mutation"]
        from_father["gene"][0] = 1 - PROBS["mutation"]
    
    # the prpbability that mother passes 1 or 0 gene to the child
    from_mother = {
        "gene": {
            1: 0,
            0: 0
        }
    }   
    if mother in two_genes:
        from_mother["gene"][1] = 1 - PROBS["mutation"]
        from_mother["gene"][0] = PROBS["mutation"]
    elif mother in one_gene:
        from_mother["gene"][1] = 0.5 # 0.5 * PROB["mutation"] + 0.5 * (1 - PROB["mutation"])
        from_mother["gene"][0] = 0.5
    else:
        from_mother["gene"][1] = PROBS["mutation"]
        from_mother["gene"][0] = 1 - PROBS["mutation"]
   
    # probability that child has 0/1/2 genes from parents
    if n == 2:
        return from_father["gene"][1] * from_mother["gene"][1]
    elif n == 1:
        return from_father["gene"][1] * from_mother["gene"][0] + \
         from_mother["gene"][1] * from_father["gene"][0]
    else:
        return from_father["gene"][0] * from_mother["gene"][0]
                                                            

In [51]:
def update(probabilities, one_gene, two_genes, have_trait, p):
    """
    Add to `probabilities` a new joint probability `p`.
    Each person should have their "gene" and "trait" distributions updated.
    Which value for each distribution is updated depends on whether
    the person is in `have_gene` and `have_trait`, respectively.
    """
    for person in set(probabilities):
        # update to gene part
        if person in one_gene:
            probabilities[person]["gene"][1] += p
        elif person in two_genes:
            probabilities[person]["gene"][2] += p
        else:
            probabilities[person]["gene"][0] += p
        
        # update to trait part
        if person in have_trait:
            probabilities[person]["trait"][True] += p
        else:
            probabilities[person]["trait"][False] += p        
    # this function operates on the original copy of probabilities; therefore no need to return
    

def normalize(probabilities):
    """
    Update `probabilities` such that each probability distribution
    is normalized (i.e., sums to 1, with relative proportions the same).
    """
    for person in set(probabilities):
        # normlize gene part
        gene_sum = probabilities[person]["gene"][1] + probabilities[person]["gene"][2] + \
         probabilities[person]["gene"][0]
        probabilities[person]["gene"][1] /= gene_sum
        probabilities[person]["gene"][2] /= gene_sum
        probabilities[person]["gene"][0] /= gene_sum
        
        # normalize trait part
        trait_sum = probabilities[person]["trait"][True] + probabilities[person]["trait"][False]
        probabilities[person]["trait"][True] /= trait_sum 
        probabilities[person]["trait"][False] /= trait_sum 
        
    # this function operates on the original copy of probabilities; therefore no need to return

In [52]:
joint_probability(people, {"Harry"}, {"James"}, {"James"})

0.0026643247487999995

In [62]:
%run heredity.py "data/family0.csv"

Harry:
  Gene:
    2: 0.0092
    1: 0.4557
    0: 0.5351
  Trait:
    True: 0.2665
    False: 0.7335
James:
  Gene:
    2: 0.1976
    1: 0.5106
    0: 0.2918
  Trait:
    True: 1.0000
    False: 0.0000
Lily:
  Gene:
    2: 0.0036
    1: 0.0136
    0: 0.9827
  Trait:
    True: 0.0000
    False: 1.0000


In [63]:
%run heredity.py "data/family1.csv"

Arthur:
  Gene:
    2: 0.0329
    1: 0.1035
    0: 0.8636
  Trait:
    True: 0.0000
    False: 1.0000
Charlie:
  Gene:
    2: 0.0018
    1: 0.1331
    0: 0.8651
  Trait:
    True: 0.0000
    False: 1.0000
Fred:
  Gene:
    2: 0.0065
    1: 0.6486
    0: 0.3449
  Trait:
    True: 1.0000
    False: 0.0000
Ginny:
  Gene:
    2: 0.0027
    1: 0.1805
    0: 0.8168
  Trait:
    True: 0.1110
    False: 0.8890
Molly:
  Gene:
    2: 0.0329
    1: 0.1035
    0: 0.8636
  Trait:
    True: 0.0000
    False: 1.0000
Ron:
  Gene:
    2: 0.0027
    1: 0.1805
    0: 0.8168
  Trait:
    True: 0.1110
    False: 0.8890


In [64]:
%run heredity.py "data/family2.csv"

Arthur:
  Gene:
    2: 0.0147
    1: 0.0344
    0: 0.9509
  Trait:
    True: 0.0000
    False: 1.0000
Hermione:
  Gene:
    2: 0.0608
    1: 0.1203
    0: 0.8189
  Trait:
    True: 0.0000
    False: 1.0000
Molly:
  Gene:
    2: 0.0404
    1: 0.0744
    0: 0.8852
  Trait:
    True: 0.0768
    False: 0.9232
Ron:
  Gene:
    2: 0.0043
    1: 0.2149
    0: 0.7808
  Trait:
    True: 0.0000
    False: 1.0000
Rose:
  Gene:
    2: 0.0088
    1: 0.7022
    0: 0.2890
  Trait:
    True: 1.0000
    False: 0.0000


In [60]:
if __name__ == "__main__":
    main()

SystemExit: Usage: python heredity.py data.csv

In [55]:
print(PROBS)
print(PROBS["gene"][1])
print(PROBS["trait"][1])

%cd "C:/Users/wangj337/Google Drive/Courses/IntroAI-CS50/2Uncertainty/heredity/data"
people = load_data("family0.csv")

{'gene': {2: 0.01, 1: 0.03, 0: 0.96}, 'trait': {2: {True: 0.65, False: 0.35}, 1: {True: 0.56, False: 0.44}, 0: {True: 0.01, False: 0.99}}, 'mutation': 0.01}
0.03
{True: 0.56, False: 0.44}
C:\Users\wangj337\Google Drive\Courses\IntroAI-CS50\2Uncertainty\heredity\data


In [19]:
# Loop over all sets of people who might have the trait
names = set(people)

print(people)
print(names)
print(powerset(names))
people['Harry']

{'Harry': {'name': 'Harry', 'mother': 'Lily', 'father': 'James', 'trait': None}, 'James': {'name': 'James', 'mother': None, 'father': None, 'trait': True}, 'Lily': {'name': 'Lily', 'mother': None, 'father': None, 'trait': False}}
{'Harry', 'Lily', 'James'}
[set(), {'Harry'}, {'Lily'}, {'James'}, {'Harry', 'Lily'}, {'Harry', 'James'}, {'James', 'Lily'}, {'Harry', 'Lily', 'James'}]


{'name': 'Harry', 'mother': 'Lily', 'father': 'James', 'trait': None}

In [40]:
# Keep track of gene and trait probabilities for each person
probabilities = {
    person: {
        "gene": {
            2: 0,
            1: 0,
            0: 0
        },
        "trait": {
            True: 0,
            False: 0
        }
    }
    for person in people
}
print(probabilities)
print(set(probabilities))

{'Harry': {'gene': {2: 0, 1: 0, 0: 0}, 'trait': {True: 0, False: 0}}, 'James': {'gene': {2: 0, 1: 0, 0: 0}, 'trait': {True: 0, False: 0}}, 'Lily': {'gene': {2: 0, 1: 0, 0: 0}, 'trait': {True: 0, False: 0}}}
{'Harry', 'Lily', 'James'}


In [27]:
have_trait = powerset(names)[6]
print(have_trait)
person = 'Lily'
print(people[person]["trait"])
print(person in have_trait)
one_gene = powerset(names)[7]
print(one_gene)

{'James', 'Lily'}
False
True
{'Harry', 'Lily', 'James'}


In [38]:
for person in one_gene:
    #print(people[person])
    if people[person]['mother'] and people[person]['father']:
        print(person)

Harry


In [39]:
PROBS["trait"][0][True]

0.01

In [41]:
a = 0.3
test_sum = 0.5
a /= test_sum
print(a)


0.6


In [56]:
%cd "C:/Users/wangj337/Google Drive/Courses/IntroAI-CS50/2Uncertainty/heredity"
!jupyter nbconvert --to script "scratch.ipynb" --output heredity

C:\Users\wangj337\Google Drive\Courses\IntroAI-CS50\2Uncertainty\heredity


[NbConvertApp] Converting notebook scratch.ipynb to script
[NbConvertApp] Writing 10768 bytes to heredity.py
