# Probability of a nation winning based on International Skating Union records


In [77]:
class ProbDist(dict):
    """A Probability Distribution; an {outcome: probability} mapping."""
    def __init__(self, mapping=(), **kwargs):
        self.update(mapping, **kwargs)
        # Make probabilities sum to 1.0; assert no negative probabilities
        total = sum(self.values())
        for outcome in self:
            self[outcome] = self[outcome] / total
            assert self[outcome] >= 0
            
            
            
def p(event, space): 
    """The probability of an event, given a sample space of equiprobable outcomes. 
    event: a collection of outcomes, or a predicate that is true of outcomes in the event. 
    space: a set of outcomes or a probability distribution of {outcome: frequency} pairs."""
    # branch on the type of the first argument
    if callable(event):
        # transform the mapping (untangible) 'event' into the collection (tangible) 'event'
        event = such_that(event, space)
        
    if isinstance(space, ProbDist):
        # if space is a dictionary of distinct probabilities, where each item does not count as the same amount
        # we need to be careful and count each amount according to what it's worth
        return sum([space[o] for o in event])
    else:
        # space is not a dictionary but a collection, let's fall back to our original division
        return Fraction(len(event & space), len(space))

def such_that(predicate, space): 
    """The outcomes in the sample pace for which the predicate is true.
    If space is a set, return a subset {outcome,...} with outcomes where predicate(element) is true;
    if space is a ProbDist, return a ProbDist {outcome: frequency,...} with outcomes where predicate(element) is true."""
    if isinstance(space, ProbDist):
        return ProbDist({o:space[o] for o in space if predicate(o)})
    else:
        return {o for o in space if predicate(o)}

 First we are finding top two Men skaters  for all the countries
 
 Nathan Chen (US)
 Vincet Zhou (US)
 
 Yuma Kagiyama(JP)
 Shorma Uno(JP)
 
 Mikhail Kolyada(RU)
 Mark Kondratiuk(RU)
 
 Daniel Grassl (IT)
 Matteto Rizzo (IT)
 
 Keegan Messing (CA)
 Roman Sadovsky (CA)

 Boyang Jin(CN)
 Yudong Chen (CN)

Now we are finding ProbDist for each one of these skaters for the men

In [78]:
MFS=ProbDist(
    USNCM=303.05,
    USVZM=282.18,
    RUMKM=265.06,
    RUMKOM=255.98,
    JPYKM=288.06,
    JPSUM=284.61,
    CAKMM=250.93,
    CARSM=218.20,
    CNBJM=243.68,
    CNYCM=174.66,
    ITDGM=255.62,
    ITMRM=245.40
)
print(MFS)

{'USNCM': 0.09879606054579894, 'USVZM': 0.09199231930312998, 'RUMKM': 0.08641109984579926, 'RUMKOM': 0.08345096709623366, 'JPYKM': 0.0939092334625403, 'JPSUM': 0.09278451342002915, 'CAKMM': 0.08180463775864487, 'CARSM': 0.07113446761621292, 'CNBJM': 0.07944109564032431, 'CNYCM': 0.05694017467391269, 'ITDGM': 0.08333360500484119, 'ITMRM': 0.08000182563253277}


we are finding top two Women  skaters  for all the countries

Kamila Valieva	(RU)
Eliza Tuktamysheva	(RU)

Kaori Sakamoto	(JP)
Mai Mihara(JP)

Alysa Liu (US)
Isabeau Levito (US)

Madeline Schizas (CA)
Kaiya Ruiter (CA)

Shan Ashley Lin	(CN)
Yi Zhu (CN)

Lara Naki Gutmann(IT)
Lucrezia Beccari(IT)

In [79]:
WFS=ProbDist(
    USALW=208.36,
    USILW=205.33,
    RUKVW=263.04,
    RUETW=231.80,
    JPKSW=213.85,
    JPMMW=211.64,
    CAMSW=187.68,
    CAKRW=178.76,
    CNSLW=157.85,
    CNYZW=152.36,
    ITLGW=163.54,
    ITLBW=152.87
)
print(WFS)

{'USALW': 0.0895371022912835, 'USILW': 0.08823504133936093, 'RUKVW': 0.11303436065799201, 'RUETW': 0.09960981143750967, 'JPKSW': 0.09189628203585609, 'JPMMW': 0.09094659401481685, 'CAMSW': 0.08065042886364028, 'CAKRW': 0.07681729893256785, 'CNSLW': 0.06783178919504271, 'CNYZW': 0.06547260945047012, 'ITLGW': 0.07027691355690394, 'ITLBW': 0.0656917682245561}


### Part 1:What is the probability for the US to win both the men's and the women's event?

In [80]:
def joint(A, B, sep=' '):
    """The joint distribution of two independent probability distributions. 
    Result is all entries of the form {a+sep+b: P(a)*P(b)}"""
    return ProbDist({a+sep+b: A[a] * B[b]
                    for a in A
                    for b in B})

JPD= joint(MFS, WFS, ' ')
JPD

{'USNCM USALW': 0.008845912979065038,
 'USNCM USILW': 0.008717274486424575,
 'USNCM RUKVW': 0.011167349539322653,
 'USNCM RUETW': 0.009841056961735822,
 'USNCM JPKSW': 0.009078990643948254,
 'USNCM JPMMW': 0.008985165208722041,
 'USNCM CAMSW': 0.007967944653056856,
 'USNCM CAKRW': 0.007589246516306709,
 'USNCM CNSLW': 0.006701513552243309,
 'USNCM CNYZW': 0.006468435887360094,
 'USNCM ITLGW': 0.00694308220673976,
 'USNCM ITLBW': 0.006490087910873835,
 'USVZM USALW': 0.008236725703456764,
 'USVZM USILW': 0.008116946096615365,
 'USVZM RUKVW': 0.010398292997875154,
 'USVZM RUETW': 0.00916333757948396,
 'USVZM JPKSW': 0.008453752119812962,
 'USVZM JPMMW': 0.008366388116143162,
 'USVZM CAMSW': 0.007419220003958367,
 'USVZM CAKRW': 0.0070666014914087675,
 'USVZM CNSLW': 0.0062400036105329714,
 'USVZM CNYZW': 0.006022977194176773,
 'USVZM ITLGW': 0.0064649362715651715,
 'USVZM ITLBW': 0.006043138118100573,
 'RUMKM USALW': 0.00773699948599564,
 'RUMKM USILW': 0.007624486967073742,
 'RUMKM RUKV

In [81]:
def US_MEN_AND_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('US') and out2.startswith('US')

p(US_MEN_AND_WOMEN,JPD)

0.03391685926556175

### What is the probability for the US to win at least one of these two events?


In [82]:
def US_MEN_OR_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('US') or out2.startswith('US')   
p(US_MEN_OR_WOMEN,JPD)

0.33464366421401165

### Question 2: If the US wins the men's event, what is the probability that the US wins the women's event? If the US wins at least one of these two events, what is the probability the US wins both events (not necessarily the first one)? How about Canada, China, and the ROC? Which nations are more likely to win both events if they win one of them?

### For USA

Lets first define few predicates

In [83]:
def US_MEN_WIN(outcome):
    out1,out2=outcome.split(' ')
    return (out1.startswith('US') and out1.endswith('M')) ##or (out2.startswith('US') and out2.endsswith('M') )
  
p(US_MEN_WIN,JPD)

0.19078837984892894

In [84]:
def US_WOMEN_WIN(outcome):
    out1,out2=outcome.split(' ')
    return (out2.startswith('US') and out2.endswith('W')) 
p(US_WOMEN_WIN,JPD)

0.1777721436306444

### If the US wins the men's event,the probability that the US wins the women's event is:


In [85]:
p(US_WOMEN_WIN,such_that(US_MEN_WIN,JPD))

0.17777214363064442

### If the US wins at least one of these two events, then the probability that  US wins both events:

In [86]:
US=p(US_MEN_AND_WOMEN,such_that(US_MEN_OR_WOMEN,JPD))
US

0.1013521631889352

### For CHINA

Lets first define few predicates

In [87]:
def CN_MEN_AND_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('CN') and out2.startswith('CN')

p(CN_MEN_AND_WOMEN,JPD)

0.018180223225750494

In [88]:
def CN_MEN_OR_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('CN') or out2.startswith('CN')   
p(CN_MEN_OR_WOMEN,JPD)

0.25150544573399936

### If CHINA wins at least one of these two events, then the probability that CHINA wins both events is:

In [89]:
CN=p(CN_MEN_AND_WOMEN,such_that(CN_MEN_OR_WOMEN,JPD))
CN

0.0722856046822084

### For CANADA

Lets define few predicates

In [90]:
def CA_MEN_AND_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('CA') and out2.startswith('CA')

p(CA_MEN_AND_WOMEN,JPD)

0.0240829734145637

In [91]:
def CA_MEN_OR_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('CA') or out2.startswith('CA')   
p(CN_MEN_OR_WOMEN,JPD)

0.25150544573399936

 ### If CANADA wins at least one of these two events, then the probability that CANADA wins both events is:

In [92]:
CA=p(CA_MEN_AND_WOMEN,such_that(CA_MEN_OR_WOMEN,JPD))
CA

0.08411095545807648

### For JAPAN

 Let define few predicates

In [93]:
def JP_MEN_AND_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('JP') and out2.startswith('JP')

p(JP_MEN_AND_WOMEN,JPD)

0.034135621620685355

In [94]:
def JP_MEN_OR_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('JP') or out2.startswith('JP')   
p(JP_MEN_OR_WOMEN,JPD)

0.335401001312557

#### If JAPAN wins at least one of these two events, then the probability that JAPAN wins both events is:

In [95]:
JP=p(JP_MEN_AND_WOMEN,such_that(JP_MEN_OR_WOMEN,JPD))
JP

0.10177555072018013

### For ROC

Lets define few predicates

In [96]:
def RU_MEN_AND_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('RU') and out2.startswith('RU')

p(RU_MEN_AND_WOMEN,JPD)

0.036120178595319276

In [97]:
def RU_MEN_OR_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('RU') or out2.startswith('RU')   
p(RU_MEN_OR_WOMEN,JPD)

0.3463860604422153

### If ROC wins at least one of these two events, then the probability that ROC wins both events is:

In [98]:
ROC=p(RU_MEN_AND_WOMEN,such_that(RU_MEN_OR_WOMEN,JPD))
ROC

0.10427722913908916

### For ITALY

Lets define few predicates:

In [99]:
def IT_MEN_AND_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('IT') and out2.startswith('IT')

p(IT_MEN_AND_WOMEN,JPD)

0.022208503191970836

In [100]:
def IT_MEN_OR_WOMEN(outcome):
    out1,out2=outcome.split(' ')
    return out1.startswith('IT') or out2.startswith('IT')   
p(IT_MEN_OR_WOMEN,JPD)

0.27709560922686316

### If ITALY wins at least one of these two events, then the probability that ITALY wins both events is:

In [101]:
IT=p(IT_MEN_AND_WOMEN,such_that(IT_MEN_OR_WOMEN,JPD))
IT

0.08014743811327713

###  Nations which are more likely to win both events if they win one of them are:


The nations which are more likely to win both events if they win one of them are obtainer by finding the probailities as show above.Based on the above calculations we can conclude that ROC(0.104) i.e. 10.4%  has the highest probability of winning both followed by JP and then US,CN,IT,CN.


### Question 3 :The US wins at least one of these two events the same day that the US team of the same sex wins the hockey event gold medal. What is the probability the US wins both events assuming that the teams in contention to win the gold medal for the hockey event for both men and women are USA, China, Russia, Canada, and Japan and that men and women teams have the same probability to win hockey gold?

First we are fining the probability ditribution of men hockey team winning.


In [102]:
HMFS=ProbDist(
    USHM=100,
    
    RUHM=100,
    
    
    JPHM=100,
    
    CAHM=100,
    
    CNHM=100,
    
  )
print(HMFS)

{'USHM': 0.2, 'RUHM': 0.2, 'JPHM': 0.2, 'CAHM': 0.2, 'CNHM': 0.2}


Now,we are fining the probability ditribution of women hockey team winning.

In [103]:
HWFS=ProbDist(
    USHW=100,
    
    RUHW=100,
    
    
    JPHW=100,
    
    CAHW=100,
    
    CNHW=100,
    
  )
print(HWFS)

{'USHW': 0.2, 'RUHW': 0.2, 'JPHW': 0.2, 'CAHW': 0.2, 'CNHW': 0.2}


From the above cells we can see that Uniform probability is prevalent for all the countries mentioned,when it comes to  winning the hockey event.

Lets find the joint distribution of us men and women participating in skating and hockey.

In [104]:
def joint(A, B,C,D, sep=' '):
    """The joint distribution of two independent probability distributions. 
    Result is all entries of the form {a+sep+b+sep+c+sep+d: P(a)*P(b)*p(c)*p(d)}"""
    return ProbDist({a+sep+b+sep+c+sep+d: A[a] * B[b] * C[c] * D[d]
                    for a in A
                    for b in B
                    for c in C
                    for d in D})

MJPD= joint(MFS,HMFS,WFS,HWFS, ' ')
MJPD


{'USNCM USHM USALW USHW': 0.0003538365191626012,
 'USNCM USHM USALW RUHW': 0.0003538365191626012,
 'USNCM USHM USALW JPHW': 0.0003538365191626012,
 'USNCM USHM USALW CAHW': 0.0003538365191626012,
 'USNCM USHM USALW CNHW': 0.0003538365191626012,
 'USNCM USHM USILW USHW': 0.00034869097945698267,
 'USNCM USHM USILW RUHW': 0.00034869097945698267,
 'USNCM USHM USILW JPHW': 0.00034869097945698267,
 'USNCM USHM USILW CAHW': 0.00034869097945698267,
 'USNCM USHM USILW CNHW': 0.00034869097945698267,
 'USNCM USHM RUKVW USHW': 0.00044669398157290565,
 'USNCM USHM RUKVW RUHW': 0.00044669398157290565,
 'USNCM USHM RUKVW JPHW': 0.00044669398157290565,
 'USNCM USHM RUKVW CAHW': 0.00044669398157290565,
 'USNCM USHM RUKVW CNHW': 0.00044669398157290565,
 'USNCM USHM RUETW USHW': 0.00039364227846943245,
 'USNCM USHM RUETW RUHW': 0.00039364227846943245,
 'USNCM USHM RUETW JPHW': 0.00039364227846943245,
 'USNCM USHM RUETW CAHW': 0.00039364227846943245,
 'USNCM USHM RUETW CNHW': 0.00039364227846943245,
 'USN

Lets define few predicates which lets us find the probability of either of the sexes winning below.

In [105]:
def US_MENWIN_OR_WOMENWIN(outcome):
    out1, out2, out3, out4 = outcome.split(' ')
    condition  = ((out1.startswith('US') and out2.startswith('US')) or
                 (out3.startswith('US') and out4.startswith('US'))                 
                 )
    return condition 
p(US_MENWIN_OR_WOMENWIN,MJPD)

0.07235543032529203

In [106]:
def US_MEN_AND_WOMEN_SKATING(outcome):
    out1, out2, out3, out4 = outcome.split(' ')
    condition = out1.startswith('US') and out3.startswith('US')
    return condition

So here is the result i.e. the probability that US wins both events assuming that the teams in contention to win the gold medal for the hockey event for both men and women are USA, China, Russia, Canada, and Japan and that men and women teams have the same probability to win hockey gold.

In [107]:
p(US_MEN_AND_WOMEN_SKATING,such_that(US_MENWIN_OR_WOMENWIN,MJPD))

0.16875125033060803