<a href="https://colab.research.google.com/github/Keerthana-Ravichandran/Deploying-Winning-Voter-Confidence-paper-as-Streamlit-app-in-colab-using-ngrok/blob/main/deploy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install streamlit -q


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for validators (setup.py) ... [?25l[?25hdone


In [2]:
!pip install streamlit_option_menu


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting streamlit_option_menu
  Downloading streamlit_option_menu-0.3.2-py3-none-any.whl (712 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m712.3/712.3 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: streamlit_option_menu
Successfully installed streamlit_option_menu-0.3.2


app.py

In [3]:
%%writefile app.py
import streamlit as st
from streamlit_option_menu import option_menu
import numpy as np
import pandas as pd
import altair as alt
import random
import math
import scipy.stats as stats
import matplotlib.pyplot as plt
from scipy.stats import hypergeom




# Set page title
st.set_page_config(page_title="Fixing India’s Faulty VVPAT-based Audit of EVMs", page_icon=":india:")


# Set page subtitle



with st.sidebar:
    selected = option_menu(' Winning Voter Confidence',
                          
                          ["INTRODUCTION", "OBJECTIVE OF THIS POLICY PAPER","II.ODDITIES OF STATISTICAL SAMPLING", "III.HYPERGEOMETRIC DISTRIBUTION MODEL: AN EXACT FIT FOR EVM SAMPLING", "IV.EVMs OF AN ASSEMBLY CONSTITUENCY AS 'POPULATION'","V.EVMs OF A PARLIAMENTARY  CONSTITUENCY AS 'POPULATION'", "VI.EVMs USED IN A STATE AS A WHOLE AS ‘POPULATION’"],
                          default_index=0)
   

# Show selected section
if selected == "INTRODUCTION":
    st.title("POLICY WATCH NO - 7")
    st.write("# Winning Voter Confidence: Fixing India’s Faulty VVPAT-based Audit of EVMs")
    st.subheader("A paper by K. Ashok Vardhan Shetty")
elif selected == "OBJECTIVE OF THIS POLICY PAPER":
    st.subheader("Policy Watch No. 7 Winning Voter Confidence: Fixing India's Faulty VVPAT-based Audit of EVMs:")
    
    
    
    st.write("It is a research paper authored by K. Ashok Vardhan Shetty. It focuses on India's electronic voting machines (EVMs) and the Voter Verifiable Paper Audit Trail (VVPAT) system used to ensure transparency and accuracy in the voting process.")



    st.write("The paper analyzes the current VVPAT-based audit system and identifies several shortcomings in its design and implementation. It suggests specific changes to improve the accuracy and reliability of the system, such as increasing the sample size of the VVPAT slips that are audited and improving the training of polling officials.")



    st.write("The goal of the paper is to promote discussion and awareness of these issues in order to improve voter confidence in the integrity of India's electoral process.")


elif selected == "II.ODDITIES OF STATISTICAL SAMPLING":
    st.subheader("ODDITIES OF STATISTICAL SAMPLING")
    st.write("The article discusses the sample size required to detect defective Electronic Voting Machines (EVMs) in an election. The author assumes that one percent of the EVMs used in an election are defective and defines a defective EVM as one that has a mismatch between the EVM count and the VVPAT count. The author uses the Hypergeometric Distribution to calculate the sample size required for 99% probability of detecting at least one defective EVM, for various population sizes of EVMs.")
    st.write("The article shows that the sample size required increases as the population size of EVMs increases, but the sampling fraction (sample size relative to the population size) decreases rapidly. The article notes that the sample size hits a plateau and remains constant even for large population sizes of EVMs, such as one crore. Therefore, for big populations, the population size is irrelevant to sample size.")

    pop_size = np.arange(1, 11)

# Define the corresponding sample size range (as a percentage of population size)
    sample_size = np.round(np.sqrt(pop_size) * 100)

# Create a DataFrame to store the data
    data = pd.DataFrame({'Population Size (Millions)': pop_size, 'Sample Size': sample_size})

# Create a bar chart using Altair
    chart = alt.Chart(data).mark_bar().encode(x='Population Size (Millions)',y='Sample Size',tooltip=['Population Size (Millions)', 
    'Sample Size']).properties(title='Sample Size vs Population Size')
    st.altair_chart(chart, use_container_width=True)
    
    
    
    
    
    
elif selected == "III.HYPERGEOMETRIC DISTRIBUTION MODEL: AN EXACT FIT FOR EVM SAMPLING":
    st.subheader("AN EXACT FIT FOR EVM SAMPLING:")
    st.write("The author proposes a new statistical model, the hypergeometric distribution model, which can provide an exact fit for the VVPAT-based audit process. The author argues that this model can overcome some of the limitations of current statistical sampling methods and provide more accurate and reliable results, thus improving voter confidence in the electoral process.")
    
# Define the number of total votes and the margin of victory
    num_votes = 100000
    margin = 5000

# Define the size of the sample to be audited
    sample_size = 1000

# Calculate the number of votes required to win
    winning_votes = (num_votes // 2) + margin

# Simulate the election results
    results = [0] * (num_votes - winning_votes) + [1] * winning_votes
    random.shuffle(results)

# Simulate the VVPAT-based audit process using the hypergeometric distribution
    audit_results = []
    for i in range(100):
        sample = random.sample(results, sample_size)
        num_winning_votes = sum(sample)
        pvalue = 1 - hypergeom.cdf(num_winning_votes, len(results), sum(results), sample_size)
        audit_results.append({"sample": i + 1, "winning_votes": num_winning_votes, "p-value": pvalue})

# Convert the audit results to a pandas DataFrame
    df = pd.DataFrame(audit_results)

# Create an Altair chart to visualize the p-values
    chart = alt.Chart(df).mark_bar().encode(
    x=alt.X("sample:O", title="Sample"),
    y=alt.Y("p-value:Q", title="p-value")).properties(
    title="VVPAT-based Audit Results")
    st.altair_chart(chart)





elif selected == "IV.EVMs OF AN ASSEMBLY CONSTITUENCY AS 'POPULATION'":
    st.subheader("EVMs OF AN ASSEMBLY CONSTITUENCY AS 'POPULATION'")
    st.write("The author argues that using the EVMs of an Assembly Constituency as the 'population' for conducting VVPAT-based audits is not a feasible option. The sample sizes required to conduct these audits are almost as large as the population sizes, which leaves little or no scope for statistical sampling. This negates the advantage of having a smaller number of EVMs to count in the event of a defective EVM turning up in the sample. Additionally, the workload involved in counting VVPAT slips from such large sample sizes is enormous. Therefore, the author suggests that using paper ballots and counting them 100% may be a more viable option than using EVMs and conducting VVPAT-based audits with such large sample sizes.")


# Define the data for the table
    N = np.array([50, 100, 200, 300])
    k = np.array([1, 1, 2, 3])
    P = 0.01
    n = np.ceil(np.log(1 - 0.95) / np.log(1 - P**k))

# Set up the bar chart
    fig, ax = plt.subplots()
    ax.bar(N, n, width=10)

# Set the plot title and axis labels
    ax.set_title('Sample Sizes for Assembly Constituencies')
    ax.set_xlabel('Population Size (Number of EVMs)')
    ax.set_ylabel('Sample Size')

# Display the plot using Streamlit
    st.pyplot(fig)
    st.write("so, EVMs used in an Assembly Constituency are not an appropriate choice for ‘population’")

elif selected == "V.EVMs OF A PARLIAMENTARY  CONSTITUENCY AS 'POPULATION'":
    st.subheader("EVMs of a Parliamentary Constituency are the Population")
    st.write(" The author first considers using the EVMs of a Parliamentary Constituency as the 'population' for statistical sampling, but notes that the resulting sample sizes required for statistical sampling are too big relative to the population size, making the workload of counting VVPAT slips enormous. Additionally,if a defective EVM is found in the chosen sample, the number of remaining EVMs in the population that need to be counted is still large, resulting in a considerable workload.")

    st.write("The author then notes that using the ECI-prescribed sample size of 'one EVM per Assembly Constituency' is also seriously wrong, with a high probability (94.1%) of failing to detect a defective EVM. ")
    
    def calc_sample_size(n, N, p):
        q = 1 - p
        k = math.ceil(N / n)
        return math.ceil(n * ((k-1) * p + 1) * ((k-1) * q + 1))

    def simulate_parliamentary_constituency(N_list, P=0.01):
        df_list = []
        for N in N_list:
            n = 6  # ECI-prescribed sample size
            defective_evms = math.ceil(N * P)
            sample_size = calc_sample_size(n, N, P)
            pct_n_to_N = round((n / N) * 100, 2)
            prob_fail = round(((defective_evms / N) ** n) * 100, 1)
            df_list.append(pd.DataFrame({
            'Population Size (N)': [N],
            'Number of Defective EVMs in the Population': [defective_evms],
            'Sample Size (n) Required': [sample_size],
            '% of n to N': [pct_n_to_N],
            'Probability of Failing to Detect a Defective EVM (%)': [prob_fail]
        }))
        return pd.concat(df_list, ignore_index=True)

    N_list = [300, 600, 1200, 1800]
    df_table_5 = simulate_parliamentary_constituency(N_list)

# Create a Streamlit table to display the results
    st.write('Table 5 - Sample Sizes if EVMs of a Parliamentary Constituency are the Population')
    st.table(df_table_5)

# Create a Streamlit chart to display the results
    chart_table_5 = alt.Chart(df_table_5).mark_bar().encode(
    x='Population Size (N):Q',
    y='Sample Size (n) Required:Q',
    color=alt.Color('% of n to N:O', scale=alt.Scale(scheme='reds')),
    tooltip=['Population Size (N)', 'Number of Defective EVMs in the Population',
             'Sample Size (n) Required', '% of n to N',
             'Probability of Failing to Detect a Defective EVM (%)']
).properties(title='Sample Sizes if EVMs of a Parliamentary Constituency are the Population',
             width=500, height=300)

    st.write('Chart 5 - Sample Sizes if EVMs of a Parliamentary Constituency are the Population')
    st.altair_chart(chart_table_5)

elif selected == "VI.EVMs USED IN A STATE AS A WHOLE AS ‘POPULATION’":
    st.subheader("EVMs of a State as a Whole:")
    st.write("The author might argue that the right population for sampling is the state rather than other levels of constituency because the state represents a homogeneous group of individuals with similar characteristics, such as culture, language, and lifestyle. This makes it easier to obtain a representative sample that accurately reflects the population's characteristics.")

    st.write("The author justifies this argument by stating that using EVMs from an assembly constituency or parliamentary constituency would result in a sample size that is nearly as big as the respective population size, leaving little or no scope for statistical sampling. Using EVMs from the entire country as a population for sampling would be too large and administratively infeasible, as counting the VVPAT slips of all remaining EVMs in the population would be too time-consuming.")

    # Define the data for the chart
    data = pd.DataFrame({
    'State': ['Mizoram', 'Chhattisgarh', 'Telangana', 'Rajasthan', 'Madhya Pradesh'],
    'Population Size': [1164, 23672, 32574, 51796, 65341],
    'Sample Size Required': [370, 455, 455, 457, 457],
    'Sampling Fraction': [31.79, 1.92, 1.40, 0.88, 0.70],
    'Average EVMs per AC': [10, 5, 4, 2, 2],
    'Failure Probability': [0.656, 0.403, 0.301, 0.133, 0.099]
})

# Define the chart
    chart = alt.Chart(data).mark_bar().encode(
    x=alt.X('State', sort=None),
    y='Sample Size Required',
    color=alt.Color('State', legend=None),
    tooltip=['Population Size', 'Sample Size Required', 'Sampling Fraction', 'Average EVMs per AC', 'Failure Probability']
).properties(
    title='Sample Sizes for EVMs of a State as a Whole'
)
    st.altair_chart(chart)
    
    st.write("The author suggests that using EVMs from a state as a whole for population sampling would lead to a minimal workload involved in the hand counting of VVPAT slips for the chosen sample size (n).")







Writing app.py


In [16]:
!pip install streamlit pyngrok


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [4]:
 ! pip install pyngrok

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyngrok
  Downloading pyngrok-6.0.0.tar.gz (681 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m681.2/681.2 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyngrok
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
  Created wheel for pyngrok: filename=pyngrok-6.0.0-py3-none-any.whl size=19879 sha256=4ff2f18db998e1068601646a99acb040239ec3c8b34a132bffb86f2ad0dde55d
  Stored in directory: /root/.cache/pip/wheels/5c/42/78/0c3d438d7f5730451a25f7ac6cbf4391759d22a67576ed7c2c
Successfully built pyngrok
Installing collected packages: pyngrok
Successfully installed pyngrok-6.0.0


In [6]:
from pyngrok import ngrok

In [37]:
from pyngrok import ngrok

url = ngrok.connect(port='8501', proto='http', options={"bind_tls": True})
print(url)




PyngrokNgrokHTTPError: ignored

In [33]:
!pip install pyngrok

from pyngrok import ngrok
url = ngrok.connect(addr="8501", proto="http")

print(url)

!nohup streamlit run app.py &



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
NgrokTunnel: "https://b966-34-68-147-218.ngrok-free.app" -> "http://localhost:8501"
nohup: appending output to 'nohup.out'


In [None]:
!ngrok start --config ngrok.yml test-tunnel


In [7]:
ngrok.set_auth_token("2PHOveUbnsTwNbtWBC3rROmUDzv_7xQbMCzmYzUgCzuCkU7tG") 



In [40]:
from pyngrok import ngrok

tunnels = ngrok.get_tunnels()
tunnels

[<NgrokTunnel: "https://ae1c-34-68-147-218.ngrok-free.app" -> "http://localhost:8501">,
 <NgrokTunnel: "https://b966-34-68-147-218.ngrok-free.app" -> "http://localhost:8501">]