<a href="https://colab.research.google.com/github/Shreya-singh01/PolySync/blob/main/POLYSYNC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **ROLE OF THIS AI:**
The AI model will allow citizens to submit inputs, like feedback or suggestions about healthcare policies. It will analyze these inputs, summarize the pros and cons, predict the potential impacts of policies, and allow citizens to vote on them. This will give policymakers a direct view of public sentiment and engagement.

**CREATING THE CUSTOM DATASET**


In [None]:
import pandas as pd
import random

def generate_dummy_healthcare_dataset(num_samples=1000):
    policy_topics = [
        "Universal healthcare coverage",
        "Telemedicine expansion",
        "Mental health services",
        "Prescription drug pricing",
        "Rural healthcare access",
        "Preventive care programs",
        "Medical research funding",
        "Healthcare workforce training",
        "Patient data privacy",
        "Emergency medical services"
    ]

    outcomes = ["Positive", "Negative", "Neutral"]

    data = []
    for _ in range(num_samples):
        topic = random.choice(policy_topics)
        outcome = random.choice(outcomes)

        policy_text = f"This policy aims to {random.choice(['improve', 'enhance', 'expand', 'reform'])} {topic}. "
        policy_text += f"It will {random.choice(['increase funding for', 'create new programs for', 'modify existing regulations on'])} {topic}. "
        policy_text += f"The goal is to {random.choice(['reduce costs', 'improve access', 'enhance quality', 'increase efficiency'])} in healthcare."

        data.append({
            "policy_id": f"POL_{_:04d}",
            "policy_text": policy_text,
            "category": topic,
            "outcome_label": 1 if outcome == "Positive" else (0 if outcome == "Negative" else 0.5)
        })

    return pd.DataFrame(data)

df = generate_dummy_healthcare_dataset()

df.to_csv('healthcare_policy_dummy_dataset.csv', index=False)

print(df.head())
print(f"\nDataset shape: {df.shape}")
print(f"\nColumn names: {df.columns}")
print(f"\nSample policy text:\n{df['policy_text'].iloc[0]}")

  policy_id                                        policy_text  \
0  POL_0000  This policy aims to improve Patient data priva...   
1  POL_0001  This policy aims to expand Mental health servi...   
2  POL_0002  This policy aims to expand Medical research fu...   
3  POL_0003  This policy aims to expand Universal healthcar...   
4  POL_0004  This policy aims to expand Universal healthcar...   

                        category  outcome_label  
0           Patient data privacy            0.5  
1         Mental health services            1.0  
2       Medical research funding            0.0  
3  Universal healthcare coverage            0.0  
4  Universal healthcare coverage            1.0  

Dataset shape: (1000, 4)

Column names: Index(['policy_id', 'policy_text', 'category', 'outcome_label'], dtype='object')

Sample policy text:
This policy aims to improve Patient data privacy. It will increase funding for Patient data privacy. The goal is to increase efficiency in healthcare.


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import re

In [None]:

data = pd.read_csv('/content/healthcare_policy_dummy_dataset.csv')

In [None]:
#refining
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    return text

data['policy_text'] = data['policy_text'].apply(preprocess_text)

In [None]:
label_encoder = LabelEncoder()
data['category_encoded'] = label_encoder.fit_transform(data['category'])

# Split into train and test
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

FOR CITIZEN INPUT

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [None]:
# FOR INPUT TEXT
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_data['policy_text'])
X_train = tokenizer.texts_to_sequences(train_data['policy_text'])
X_test = tokenizer.texts_to_sequences(test_data['policy_text'])


X_train_padded = pad_sequences(X_train, maxlen=100, padding='post')
X_test_padded = pad_sequences(X_test, maxlen=100, padding='post')

# Target (outcome) variables
y_train = train_data['outcome_label']
y_test = test_data['outcome_label']


model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=5000, output_dim=128, input_length=100),
    tf.keras.layers.LSTM(128, return_sequences=False),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [None]:
#MODEL TRAINING
model.fit(X_train_padded, y_train, epochs=10, validation_data=(X_test_padded, y_test))

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 652ms/step - accuracy: 0.3312 - loss: 0.6938 - val_accuracy: 0.3100 - val_loss: 0.6957
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 332ms/step - accuracy: 0.3260 - loss: 0.6952 - val_accuracy: 0.3100 - val_loss: 0.6932
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 466ms/step - accuracy: 0.3420 - loss: 0.6937 - val_accuracy: 0.3400 - val_loss: 0.6931
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 158ms/step - accuracy: 0.3185 - loss: 0.6934 - val_accuracy: 0.3100 - val_loss: 0.6932
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 157ms/step - accuracy: 0.3209 - loss: 0.6932 - val_accuracy: 0.3100 - val_loss: 0.6934
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 220ms/step - accuracy: 0.3341 - loss: 0.6932 - val_accuracy: 0.3400 - val_loss: 0.6931
Epoch 7/10
[1m25/25[0m 

<keras.src.callbacks.history.History at 0x7c83b3be9bd0>

In [None]:
!pip install tensorflow
import tensorflow as tf



In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import re

# StatiC selected policies
static_policy_responses = {
    "limited free access of expensive medicines for rural support should be provided": {
        "Pros": [
            "Increases access to essential medicines for underserved rural populations.",
            "Reduces financial burden on families in rural areas."
        ],
        "Cons": [
            "May lead to higher government spending or budget deficits.",
            "Limited availability could lead to unequal distribution."
        ],
        "Suggestions": [
            "Ensure equitable distribution across all rural areas.",
            "Consider expanding to cover more essential medicines."
        ]
    },
    "universal healthcare coverage for low-income families": {
    "Pros": [
        "Provides access to essential healthcare services for underserved populations.",
        "Reduces financial strain on low-income households due to medical costs."
    ],
    "Cons": [
        "Could lead to higher taxes or government spending to fund the initiative.",
        "May strain healthcare resources, leading to longer wait times or lower service quality."
    ],
    "Suggestions": [
        "Ensure a sustainable funding model to avoid long-term fiscal strain.",
        "Consider scaling the program based on available healthcare infrastructure to prevent resource shortages."
    ]
  }
}


def preprocess_input_text(input_text):
    input_text = input_text.lower()
    input_text = re.sub(r'\d+', '', input_text)
    input_text = re.sub(r'[^\w\s]', '', input_text)
    return input_text


def generate_suggestions(prediction, input_text):
    if prediction >= 0.4:
        return [
            "Ensure sustained funding to support the policy for the long-term.",
            "Expand the policy to cover more rural areas with inadequate healthcare access."
        ]
    else:
        return [
            "Reevaluate the budget allocation to avoid overburdening government resources.",
            "Consider alternative ways to provide support, such as subsidizing medicines instead of full coverage."
        ]

#DYNAMIC
def generate_pros_cons(input_text, prediction):
    positive_keywords = ['access', 'improve', 'benefit', 'efficient', 'affordable', 'quality', 'expansion', 'support']
    negative_keywords = ['cost', 'expensive', 'limit', 'restrict', 'burden', 'complex', 'inequality', 'delay']


    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform([input_text])
    feature_names = vectorizer.get_feature_names_out()

    sorted_indices = np.argsort(tfidf_matrix.toarray()).flatten()[::-1]
    important_words = [feature_names[i] for i in sorted_indices[:7]]

    pros = []
    cons = []

    for word in important_words:
        if word in positive_keywords:
            pros.append(f"May {word} healthcare services")
        elif word in negative_keywords:
            cons.append(f"Could {word} certain aspects of healthcare")

    if prediction > 0.5:
        pros.append("Overall, the policy is likely to have a positive impact on healthcare delivery.")
        if len(cons) == 0:
            cons.append("Potential for minor drawbacks or inefficiencies despite the positive impact.")
    else:
        cons.append("Overall, the policy may negatively impact healthcare, introducing challenges.")
        if len(pros) == 0:
            pros.append("However, certain aspects could still bring limited benefits to healthcare.")

    return pros, cons


def generate_outcome_report(input_text, model, tokenizer):

    processed_text = preprocess_input_text(input_text)


    for policy, responses in static_policy_responses.items():
        if policy in processed_text:

            return {
                "Outcome": "Static Outcome for Prototype",
                "Prediction Score": 0.5,
                "Pros": responses["Pros"],
                "Cons": responses["Cons"],
                "Suggestions": responses["Suggestions"]
            }


    tokenized_text = tokenizer.texts_to_sequences([processed_text])
    padded_text = pad_sequences(tokenized_text, maxlen=100, padding='post')

    # Prediction
    prediction = model.predict(padded_text)[0][0]

    if prediction >= 0.4:
        outcome = "Positive Impact"
    else:
        outcome = "Negative Impact"


    pros, cons = generate_pros_cons(input_text, prediction)
    suggestions = generate_suggestions(prediction, input_text)

    # Return
    return {
        "Outcome": outcome,
        "Prediction Score": float(prediction),
        "Pros": pros,
        "Cons": cons,
        "Suggestions": suggestions
    }

# Example
citizen_input = "A policy for limited free access of expensive medicines for rural support"
report = generate_outcome_report(citizen_input, model, tokenizer)


print(f"Outcome: {report['Outcome']}")
print(f"Prediction Score: {report['Prediction Score']:.2f}")
print("\nPros:")
print("\n".join(report['Pros']))
print("\nCons:")
print("\n".join(report['Cons']))
print("\nSuggestions:")
print("\n".join(report['Suggestions']))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
Outcome: Positive Impact
Prediction Score: 0.50

Pros:
May support healthcare services
Overall, the policy is likely to have a positive impact on healthcare delivery.

Cons:
Could expensive certain aspects of healthcare

Suggestions:
Ensure sustained funding to support the policy for the long-term.
Expand the policy to cover more rural areas with inadequate healthcare access.


In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.114.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from g

In [None]:
import gradio as gr

In [None]:

def policy_feedback(input_text):
    report = generate_outcome_report(input_text, model, tokenizer)
    result = f"Outcome: {report['Outcome']}\n"
    result += f"Prediction Score: {report['Prediction Score']:.2f}\n\n"

    result += "Pros:\n" + "\n".join(report['Pros']) + "\n\n"
    result += "Cons:\n" + "\n".join(report['Cons']) + "\n\n"
    result += "Suggestions:\n" + "\n".join(report['Suggestions'])

    return result

# Gradio Interface
iface = gr.Interface(
    fn=policy_feedback,
    inputs="text",
    outputs="text",
    title="Citizen Input for Healthcare Policies",
    description="This tool generates an outcome report for healthcare policies based on citizen input, providing pros, cons, and suggestions."
)

iface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://00093e0f95c7209f3b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


