In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

In [2]:
df=pd.read_csv("train_essays.csv")

In [3]:
df

Unnamed: 0,id,prompt_id,text,generated
0,0059830c,0,Cars. Cars have been around since they became ...,0
1,005db917,0,Transportation is a large necessity in most co...,0
2,008f63e3,0,"""America's love affair with it's vehicles seem...",0
3,00940276,0,How often do you ride in a car? Do you drive a...,0
4,00c39458,0,Cars are a wonderful thing. They are perhaps o...,0
...,...,...,...,...
1373,fe6ff9a5,1,There has been a fuss about the Elector Colleg...,0
1374,ff669174,0,Limiting car usage has many advantages. Such a...,0
1375,ffa247e0,0,There's a new trend that has been developing f...,0
1376,ffc237e9,0,As we all know cars are a big part of our soci...,0


In [4]:
# generated - Whether the essay was written by a student (0) or generated by an LLM (1)

In [37]:
df.drop("id",axis=1,inplace=True)

In [5]:
df['prompt_id'].value_counts()

0    708
1    670
Name: prompt_id, dtype: int64

In [6]:
len(df['text'].value_counts().index)

1378

In [7]:
df['generated'].value_counts()

0    1375
1       3
Name: generated, dtype: int64

In [8]:
## Seeing the avg of word count of essays

avg_length=0
for essay in df['text']:
    ll=len(essay)
    avg_length+=ll
avg_length

4366952

In [9]:
avg_length=avg_length/len(df['text'])
avg_length

3169.0507982583454

In [10]:
df['text'][0]

'Cars. Cars have been around since they became famous in the 1900s, when Henry Ford created and built the first ModelT. Cars have played a major role in our every day lives since then. But now, people are starting to question if limiting car usage would be a good thing. To me, limiting the use of cars might be a good thing to do.\n\nIn like matter of this, article, "In German Suburb, Life Goes On Without Cars," by Elizabeth Rosenthal states, how automobiles are the linchpin of suburbs, where middle class families from either Shanghai or Chicago tend to make their homes. Experts say how this is a huge impediment to current efforts to reduce greenhouse gas emissions from tailpipe. Passenger cars are responsible for 12 percent of greenhouse gas emissions in Europe...and up to 50 percent in some carintensive areas in the United States. Cars are the main reason for the greenhouse gas emissions because of a lot of people driving them around all the time getting where they need to go. Article

In [11]:
## Creating the model without the original dataset 

train_df, dev_df = train_test_split(df, test_size=0.2, random_state=42)

# Build a vocabulary
vectorizer = CountVectorizer(min_df=5)  # omit words occurring less than five times
X_train = vectorizer.fit_transform(train_df['text'])
vocabulary = vectorizer.get_feature_names_out()

# Reverse index
reverse_index = {word: idx for idx, word in enumerate(vocabulary)}

# Calculate probabilities
total_documents = len(train_df)
class_counts = train_df['generated'].value_counts()

# P[word]
word_occurrence_prob = np.array(X_train.sum(axis=0) / total_documents).flatten()

# P[word | LLM]
llm_documents = train_df[train_df['generated'] == 1]
llm_word_occurrence_prob = np.array(X_train[train_df['generated'] == 1].sum(axis=0) / class_counts[1]).flatten()

# Build the LLM model
model = MultinomialNB()
model.fit(X_train, train_df['generated'])

# Predict on development set
X_dev = vectorizer.transform(dev_df['text'])
predictions = model.predict(X_dev)

# Calculate accuracy on the development set
accuracy = accuracy_score(dev_df['generated'], predictions)

# Experiments with smoothing
smoothing_values = [1.0, 0.5, 0.1, 0.01]
for alpha in smoothing_values:
    model = MultinomialNB(alpha=alpha)
    model.fit(X_train, train_df['generated'])
    predictions = model.predict(X_dev)
    accuracy = accuracy_score(dev_df['generated'], predictions)
    print(f'Accuracy with smoothing alpha={alpha}: {accuracy}')

# Derive Top 10 words that predict each class
top_words_llm = model.feature_log_prob_[1].argsort()[-10:][::-1]
top_words_human = model.feature_log_prob_[0].argsort()[-10:][::-1]

top_words_llm = [vocabulary[idx] for idx in top_words_llm]
top_words_human = [vocabulary[idx] for idx in top_words_human]


Accuracy with smoothing alpha=1.0: 0.9963768115942029
Accuracy with smoothing alpha=0.5: 0.9963768115942029
Accuracy with smoothing alpha=0.1: 0.9963768115942029
Accuracy with smoothing alpha=0.01: 0.9963768115942029


In [12]:
df_test=pd.read_csv("test_essays.csv")

In [13]:
df_test

Unnamed: 0,id,prompt_id,text
0,0000aaaa,2,Aaa bbb ccc.
1,1111bbbb,3,Bbb ccc ddd.
2,2222cccc,4,CCC ddd eee.


In [14]:

# P[class | word] using the test dataset
test_df = pd.read_csv('test_essays.csv')  # Load your test dataset
X_test = vectorizer.transform(test_df['text'])
predictions_test = model.predict(X_test)
# accuracy_test = accuracy_score(test_df['generated'], predictions_test)

In [15]:
predictions_test

array([0, 0, 0], dtype=int64)

In [16]:
df2=pd.read_csv("train_prompts.csv")

In [17]:
df2

Unnamed: 0,prompt_id,prompt_name,instructions,source_text
0,0,Car-free cities,Write an explanatory essay to inform fellow ci...,"# In German Suburb, Life Goes On Without Cars ..."
1,1,Does the electoral college work?,Write a letter to your state senator in which ...,# What Is the Electoral College? by the Office...


In [33]:
df2['instructions'][1]

'Write a letter to your state senator in which you argue in favor of keeping the Electoral College or changing to election by popular vote for the president of the United States. Use the information from the texts in your essay. Manage your time carefully so that you can read the passages; plan your response; write your response; and revise and edit your response. Be sure to include a claim; address counterclaims; use evidence from multiple sources; and avoid overly relying on one source. Your response should be in the form of a multiparagraph essay. Write your response in the space provided.'

Creating the artificial dataset

In [46]:
l1=[0,0,0,0,1,0,0,1,1,1] ##  types of prompts
l3=[1,1,1,1,1,1,1,1,1,1] ##  type of generated
l2=["Limiting Car Usage: A Comprehensive Exploration of Environmental, Health, Economic, and Urban Implications.The global discourse on sustainability has reached a critical juncture, urging citizens to reevaluate their lifestyles and habits, particularly in the context of transportation. The pervasive reliance on automobiles has come under scrutiny, and as responsible citizens, it is imperative to understand the advantages associated with limiting car usage. This essay endeavors to elucidate the multifaceted benefits of such limitations, drawing upon a diverse range of sources to provide a comprehensive view of the subject.At the forefront of the advantages lies the environmental impact of reducing car usage. The conventional combustion engines in automobiles contribute significantly to air pollution and greenhouse gas emissions. According to data from the Environmental Protection Agency (EPA), the transportation sector, with cars being a substantial contributor, is a major source of greenhouse gases. By limiting car usage, particularly those powered by fossil fuels, we can curtail these emissions, thereby contributing to improved air quality and a reduction in the overall carbon footprint. The mitigation of environmental degradation is paramount in the face of escalating climate change concerns, making the limitation of car usage a crucial step towards a sustainable future.The ramifications of reducing car usage extend beyond the environmental realm to encompass substantial benefits for public health. Numerous studies, including research conducted by the World Health Organization (WHO), emphasize the detrimental health effects of air pollution, linking it to a spectrum of illnesses, from respiratory diseases to cardiovascular problems. A reduction in the number of cars on the road directly correlates with a decrease in harmful pollutants, translating to improved air quality and a subsequent reduction in health-related issues. Therefore, limiting car usage emerges not only as an environmental necessity but as a critical intervention for safeguarding public health.Moreover, the economic advantages of limiting car usage are noteworthy. The traditional approach to accommodating a burgeoning number of vehicles places a considerable financial burden on governments and local authorities for road maintenance and construction. A report from the International Transport Forum (ITF) underscores the economic benefits of reducing car dependency, citing potential cost savings in infrastructure upkeep. Beyond cost savings, promoting alternative modes of transportation, such as cycling and public transit, can stimulate economic activities in related industries, generating job opportunities and fostering economic growth.The significance of limiting car usage is further underscored when viewed through the lens of urban planning and development. Many cities grapple with challenges like traffic congestion, inadequate parking spaces, and the strain on existing infrastructure due to the proliferation of automobiles. The Urban Land Institute (ULI) advocates for sustainable urban planning that prioritizes mixed-use developments, reducing the need for extensive car travel and fostering a more walkable and livable urban environment. Limiting car usage aligns with the broader goals of creating sustainable, resilient, and efficient urban spaces that cater to the needs of diverse populations.While the advantages of limiting car usage are evident, it is essential to address potential concerns and challenges associated with such a transition. One common apprehension revolves around the convenience and accessibility of alternative modes of transportation. In many regions, existing public transit systems may be inadequate or unreliable, posing a significant challenge for individuals to shift away from private car usage. Addressing this concern requires strategic investments in public transportation infrastructure, including the expansion of routes, improved frequency, and the integration of emerging technologies to enhance efficiency.Additionally, the successful transition to limited car usage necessitates a shift in societal attitudes and behaviors. Car ownership has long been synonymous with status and convenience, and convincing individuals to embrace alternative modes of transportation requires a concerted effort in promoting the benefits of a car-free lifestyle. Public awareness campaigns, incentives for sustainable transportation choices, and educational programs can play a pivotal role in changing societal norms and encouraging a collective commitment to reducing car dependency.In conclusion, limiting car usage is a multifaceted solution with far-reaching benefits for the environment, public health, the economy, and urban development. Drawing on information from diverse sources, including environmental agencies, health organizations, economic studies, and urban planning experts, it becomes evident that a transition away from excessive car usage is not only desirable but also imperative for a sustainable future. As responsible citizens, it is incumbent upon us to advocate for and participate in initiatives that promote alternative transportation modes, reduce our carbon footprint, and pave the way for a more harmonious coexistence with the environment. Through collective efforts, we can embark on a journey toward a future where our cities are less congested, our air is cleaner, and our communities thrive in a sustainable and resilient manner.","Limiting Car Usage for a Sustainable FutureIn recent years, the global community has been grappling with the urgent need to address the environmental challenges posed by excessive car usage. As citizens, it is crucial for us to comprehend the multifaceted advantages associated with limiting our reliance on cars. This essay aims to elucidate the various benefits that come with reducing car usage, shedding light on environmental, economic, and societal considerations. By delving into information from diverse sources, we can better appreciate the potential positive impacts of adopting alternative transportation methods.To begin with, one of the primary advantages of limiting car usage is the positive impact on the environment. The omnipresence of cars powered by fossil fuels has significantly contributed to air pollution and greenhouse gas emissions. According to a report by the World Health Organization (WHO), air pollution from vehicle emissions is a major contributor to respiratory diseases and premature deaths worldwide. By embracing alternative modes of transportation such as public transit, cycling, or walking, we can mitigate the environmental degradation associated with car usage. The reduction in emissions not only improves air quality but also contributes to the global effort to combat climate change, making it a vital step toward a more sustainable future.Moreover, limiting car usage can have substantial economic benefits. A study conducted by the International Energy Agency (IEA) highlights the economic costs associated with the reliance on cars, including congestion-related expenses, infrastructure maintenance, and healthcare costs due to air pollution. By reducing the number of cars on the road, cities can alleviate traffic congestion, leading to time and fuel savings for commuters. Additionally, investing in public transportation infrastructure creates jobs and stimulates economic growth. A shift towards more sustainable transportation options not only improves the overall economic efficiency of a region but also fosters a healthier and more vibrant community.Beyond the environmental and economic advantages, limiting car usage can positively impact societal well-being. The American Journal of Public Health published a study emphasizing the link between car dependence and sedentary lifestyles, which contribute to health issues such as obesity and cardiovascular diseases. Encouraging alternative modes of transportation, such as walking and cycling, promotes physical activity and healthier lifestyles. Moreover, a reduction in traffic congestion enhances road safety, reducing the number of accidents and associated injuries. By fostering a safer and healthier environment, the community experiences improved quality of life, with individuals enjoying the benefits of cleaner air, increased physical activity, and enhanced overall well-being.Critics may argue that limiting car usage poses challenges in terms of convenience and flexibility. However, advancements in technology and urban planning offer viable solutions to address these concerns. The integration of smart transportation systems, ride-sharing platforms, and improved public transit options can provide citizens with convenient and flexible alternatives to car ownership. Embracing these innovations not only addresses concerns about convenience but also contributes to the development of more sustainable and efficient urban environments.In conclusion, the advantages of limiting car usage are vast and extend across environmental, economic, and societal dimensions. By reducing our dependence on cars, we can contribute to a healthier planet, a more robust economy, and improved overall well-being for society. The evidence from various sources underscores the urgency of transitioning towards sustainable transportation methods. As responsible citizens, it is imperative that we recognize the impact of our choices and actively participate in creating a future where the benefits of limiting car usage are realized on a global scale.","In our modern society, the reliance on cars has become deeply ingrained, yet it’s crucial to consider the benefits of limiting their usage. This perspective isn’t merely rooted in environmental concerns but extends to multifaceted advantages that span social, economic, and personal spheres.Firstly, curbing car usage offers a substantial opportunity to alleviate the strain on our environment. Transportation is a significant contributor to greenhouse gas emissions, with cars being a prime culprit. By reducing the number of cars on the roads, we can effectively lower carbon emissions, combatting climate change and its far-reaching impacts. According to multiple environmental studies, a decrease in car usage directly correlates with reduced air pollution and a healthier atmosphere for both humans and ecosystems. This is a compelling reason to promote alternative transportation methods like walking, cycling, and utilizing public transportation systems.Furthermore, limiting car usage can profoundly impact urban infrastructure and community well-being. Heavy reliance on cars congests roads, leading to increased traffic, longer commute times, and heightened stress levels among commuters. Research has shown that cities designed around automobiles tend to have lower walkability scores and reduced social interactions among residents. Conversely, when cities prioritize pedestrian-friendly spaces and efficient public transportation, communities thrive. The allocation of resources towards creating walkable neighborhoods not only encourages physical activity but also fosters a sense of belonging and community engagement.Economically, reducing car dependence can yield significant financial benefits at both individual and societal levels. The costs associated with car ownership – maintenance, fuel, insurance – can burden households. By opting for alternative modes of transportation, individuals can save on these expenses while also reducing the strain on public infrastructure. Moreover, cities investing in robust public transportation systems often see a boost in local economies. Efficient public transit networks attract businesses, create job opportunities, and enhance accessibility for all residents, irrespective of their access to private vehicles.Another crucial advantage of limiting car usage lies in improved public health outcomes. Sedentary lifestyles associated with excessive car use contribute to health issues such as obesity, cardiovascular diseases, and mental health disorders. Encouraging walking or cycling as viable alternatives not only promotes physical fitness but also aids in reducing the prevalence of chronic health conditions. Studies have consistently highlighted the positive correlation between active transportation and improved overall health.In conclusion, the advantages of limiting car usage span various domains, from environmental sustainability and community well-being to economic prosperity and public health. Embracing alternative transportation methods and restructuring urban spaces to reduce dependency on cars is a holistic approach toward building more resilient and vibrant societies. As we navigate the challenges of the modern world, rethinking our reliance on automobiles stands as a crucial step towards a more sustainable and inclusive future.","Limiting car usage is a topic of increasing importance in contemporary society, as concerns over environmental sustainability, traffic congestion, and public health continue to mount. In this essay, I will explore the advantages of restricting car usage, drawing on information from various sources to present a comprehensive understanding of the issue. From reducing air pollution and mitigating climate change to improving public health and fostering community engagement, limiting car usage can yield a myriad of benefits that extend far beyond individual preferences.One primary advantage of limiting car usage is the positive impact on the environment. According to the Environmental Protection Agency (EPA), cars are a significant contributor to air pollution, emitting pollutants such as carbon monoxide, nitrogen oxides, and particulate matter. These pollutants not only degrade air quality but also contribute to the greenhouse effect, exacerbating climate change. By reducing the number of cars on the road through measures like encouraging public transportation, carpooling, and implementing pedestrian-friendly infrastructure, we can significantly decrease the emission of harmful pollutants and alleviate the environmental burden.Furthermore, limiting car usage can have profound effects on public health. The World Health Organization (WHO) has identified air pollution as a major risk factor for various respiratory and cardiovascular diseases. By curbing car usage, especially in densely populated urban areas, we can reduce the concentration of pollutants in the air, thereby lowering the incidence of respiratory illnesses and improving overall public health. Additionally, promoting alternative modes of transportation, such as cycling and walking, not only reduces air pollution but also encourages physical activity, addressing the growing concerns related to sedentary lifestyles and obesity.In addition to the environmental and health benefits, limiting car usage can enhance the overall quality of urban life by mitigating traffic congestion. According to a study conducted by the Institute of Transportation Studies, traffic congestion not only results in wasted time and increased fuel consumption but also has economic implications due to the associated costs of delayed travel. Implementing policies that discourage excessive car use and promote alternative transportation methods can alleviate congestion, making cities more efficient, sustainable, and economically viable.Moreover, limiting car usage can contribute to the creation of more vibrant and connected communities. In a society where car dependence often leads to isolated lifestyles, promoting alternatives like walking and cycling fosters a sense of community. Local businesses can benefit from increased foot traffic, and public spaces become more conducive to social interactions. This sense of community engagement not only enhances the overall well-being of residents but also creates a more sustainable and resilient social fabric.While the advantages of limiting car usage are substantial, it is essential to acknowledge potential challenges and address them in a balanced manner. One concern often raised is the need for efficient and accessible alternative transportation options. Investing in well-designed public transportation systems, safe cycling infrastructure, and pedestrian-friendly urban planning can address this challenge and make alternative modes of transportation viable and attractive to a broader segment of the population.In conclusion, limiting car usage holds immense potential for creating a more sustainable, healthier, and connected society. By reducing environmental pollution, improving public health, alleviating traffic congestion, and fostering community engagement, the benefits of embracing alternative transportation methods extend far beyond individual preferences. It is incumbent upon us as responsible citizens to advocate for and participate in initiatives that promote a shift away from excessive car reliance, paving the way for a more harmonious and sustainable future.", "I hope this letter finds you well. As a concerned citizen deeply invested in the democratic processes that shape our nation, I am writing to express my views on the Electoral College and its role in electing the President of the United States. The ongoing debate between maintaining the current system and transitioning to a popular vote system has generated significant discourse, and I believe it is crucial for us, as constituents, to engage in this conversation with careful consideration of the arguments on both sides.Proponents of the Electoral College argue that it serves as a vital mechanism to ensure the representation of diverse voices across the nation. One fundamental aspect of the Electoral College is its allocation of electoral votes based on each state's population size, adding a layer of proportional representation to the election process. This perspective contends that without the Electoral College, candidates might exclusively focus on densely populated urban areas, neglecting the concerns and priorities of citizens in less populated regions.Moreover, the Electoral College is viewed by some as a safeguard against potential election fraud. In a system where a candidate must secure a majority of electoral votes from various states, the risk of manipulating the outcome through fraud or interference is perceived to be lower than in a direct popular vote scenario. This assertion is underscored by historical examples where localized fraud, rather than a widespread national issue, has been more easily manageable.However, critics of the Electoral College argue that it can lead to situations where the will of the majority is not accurately reflected in the election outcome. The winner-takes-all system, in which the candidate who secures the popular vote in a state receives all of its electoral votes, has been a point of contention. This mechanism can result in a candidate winning the presidency without winning the overall popular vote, as witnessed in some past elections. Detractors argue that this outcome undermines the democratic principle of one person, one vote, and can lead to a sense of disenfranchisement among voters.Furthermore, critics point to the potential for swing states to disproportionately influence election outcomes, leading candidates to focus disproportionately on a select few states to the neglect of others. This, they argue, can create an imbalance in policy priorities, as candidates tailor their platforms to appeal to the specific concerns of voters in these battleground states, potentially overlooking the needs of citizens in non-swing states.The push for a popular vote system is grounded in the belief that it would ensure a more direct and straightforward representation of the collective will of the American people. Advocates for this change argue that it aligns with the democratic principle of equal representation, with every vote counting equally, regardless of the state in which it is cast. The direct election of the president through a popular vote is seen as a means of fostering a more inclusive and participatory democracy.Additionally, proponents of a popular vote system contend that it would encourage candidates to campaign and formulate policies with a national perspective, rather than focusing on the preferences of specific states or demographics. This shift, they argue, would lead to a more holistic and comprehensive approach to governance, as candidates seek to appeal to a broad and diverse electorate.In conclusion, the debate over whether to retain the Electoral College or transition to a popular vote system is a complex and nuanced one. The Electoral College has been praised for its role in providing proportional representation and safeguarding against potential fraud, while critics argue that it can lead to outcomes that do not accurately reflect the majority will. As we navigate this discussion, it is imperative to weigh these arguments carefully, considering the impact of our electoral system on the principles of representation and fairness that underpin our democracy.I encourage you, as my representative, to carefully consider the views of your constituents on this matter. The decision regarding the electoral process is one that shapes the very foundation of our democracy, and I trust that you will advocate for a system that best aligns with the values and aspirations of the American people. Thank you for your attention to this crucial issue, and I look forward to hearing about your stance on this matter.","In today's world, cars have become an indispensable part of our lives, providing us with convenience and mobility. However, the excessive use of cars has also brought about a range of challenges, including environmental degradation, traffic congestion, and health problems. Limiting car usage offers a multitude of advantages that can enhance our well-being and create a more sustainable future.Firstly, reducing car usage can significantly improve air quality. Cars emit harmful pollutants such as carbon monoxide, nitrogen oxides, and particulate matter, which contribute to air pollution and its associated health risks. By limiting car usage, we can reduce these emissions, leading to cleaner air and a healthier environment.Secondly, limiting car usage can alleviate traffic congestion, which is a major problem in urban areas worldwide. Traffic congestion wastes time, increases fuel consumption, and contributes to stress and frustration. By shifting towards alternative modes of transportation such as public transport, cycling, or walking, we can reduce the number of cars on the road, easing congestion and improving traffic flow.Thirdly, limiting car usage can promote physical activity and overall health. A sedentary lifestyle, often associated with excessive car usage, contributes to various health problems, including obesity, heart disease, and diabetes. By embracing alternative modes of transportation, we can incorporate physical activity into our daily routines, leading to improved fitness, reduced risk of chronic diseases, and enhanced mental well-being.Fourthly, limiting car usage can reduce our reliance on fossil fuels, which are a finite resource and major contributor to climate change. By using cars less frequently, we can conserve fossil fuels and reduce our carbon footprint, contributing to a more sustainable future.Fifthly, limiting car usage can create more vibrant and livable communities. Car-centric urban planning has often led to sprawling suburbs, fragmented communities, and a loss of public spaces. By reducing car dependency, we can create more walkable and bike-friendly communities, fostering social interaction, enhancing community cohesion, and revitalizing public spaces.In conclusion, limiting car usage offers a plethora of advantages that can improve our environment, enhance our health, and create more sustainable and livable communities. By embracing alternative modes of transportation, we can reduce our reliance on cars and reap the numerous benefits that come with a more sustainable approach to transportation","Our reliance on cars has become increasingly evident in todays world, shaping our daily commutes, leisure activities, and overall way of life. While cars have undoubtedly revolutionized transportation, their excessive use has brought about a multitude of challenges, prompting the need for a shift towards more sustainable and healthy modes of travel.Limiting car usage offers a plethora of advantages that can significantly enhance our lives and the environment around us. By reducing our dependence on cars, we can effectively tackle air pollution, a major environmental concern that has detrimental effects on human health. Cars emit harmful greenhouse gases, including carbon dioxide, nitrogen oxides, and particulate matter, which contribute to smog, acid rain, and climate change. These pollutants are linked to respiratory illnesses, heart disease, and even premature death. By opting for alternative transportation modes such as public transit, cycling, or walking, we can significantly reduce our carbon footprint and create a cleaner, healthier environment for all.In addition to environmental benefits, limiting car usage also promotes a healthier lifestyle and reduces stress levels. Overreliance on cars has led to a sedentary lifestyle, with individuals spending excessive amounts of time sitting in traffic or behind the wheel. This lack of physical activity increases the risk of obesity, cardiovascular diseases, and other chronic health conditions. By embracing alternative modes of transportation, we can incorporate physical activity into our daily routines, promoting overall well-being and reducing the burden on healthcare systems. Moreover, the stress associated with driving, such as traffic congestion and road rage, can significantly impact mental health. By choosing alternative transportation options, we can reduce our exposure to these stressors and create a more peaceful and enjoyable commute.Limiting car usage can also lead to economic savings for individuals and communities as a whole. The financial burden of car ownership, including fuel costs, maintenance expenses, and insurance premiums, can be substantial. By reducing car usage, individuals can save money on these expenses, freeing up funds for other necessities or personal pursuits. Additionally, communities can benefit from reduced road infrastructure costs, as less car usage translates to less wear and tear on roads and bridges. These savings can be redirected towards other essential public services, such as education, healthcare, and infrastructure development.Furthermore, limiting car usage fosters a more vibrant and interconnected community. Cars can often create barriers between people, isolating individuals from their neighborhoods and communities. By embracing alternative transportation modes, we can rediscover the joy of walking, cycling, or using public transit, which allows us to connect with our surroundings and engage with fellow citizens. This increased interaction can lead to stronger social ties, a sense of belonging, and a more vibrant community spirit.In conclusion, limiting car usage presents a myriad of advantages that can enhance our lives and the environment around us. By reducing our reliance on cars, we can combat air pollution, promote healthier lifestyles, reduce stress levels, achieve economic savings, and foster stronger communities. Embracing alternative transportation modes is a crucial step towards creating a more sustainable, healthy, and connected future for all.","I am writing to you today to express my strong support for maintaining the Electoral College system for electing the President of the United States. I believe that the Electoral College is essential to preserving the balance of power between the federal government and the states, and that it ensures that every state, regardless of its size or population, has a voice in the selection of our nation's leader.The Electoral College was established by the Founding Fathers as a compromise between electing the President by a direct popular vote and electing the President by the Congress. The Founding Fathers recognized that a direct popular vote would be unfair to less populous states, as the majority of the population would be concentrated in a few large states. They also recognized that electing the President by Congress would be inefficient and could lead to gridlock. The Electoral College struck a balance between these two extremes, ensuring that every state has a voice in the election while also preventing any one state from having too much power.The Electoral College has served our country well for over 200 years. It has helped to ensure that we have a stable and balanced government, and it has prevented the rise of a national tyranny. In recent years, there have been calls to abolish the Electoral College, but I believe that these calls are misguided. The Electoral College is a vital part of our Constitution, and it should not be tampered with.There are several reasons why the Electoral College should be maintained. First, the Electoral College protects the interests of small states. In a direct popular vote, a candidate could win the election by winning the popular vote in just a few large states. This would mean that the voices of small states would be drowned out, and their interests would not be represented in the government. The Electoral College, on the other hand, ensures that every state has a voice in the election, regardless of its size.Second, the Electoral College encourages candidates to campaign in all parts of the country. In a direct popular vote, a candidate could focus on campaigning in a few large states and win the election without ever setting foot in a small state. The Electoral College, on the other hand, forces candidates to campaign in all parts of the country in order to win the election. This helps to ensure that all Americans have a chance to hear from the candidates and to participate in the democratic process.Third, the Electoral College prevents the rise of a national tyranny. In a direct popular vote, a candidate could win the election by winning the popular vote by a very small margin. This would give the candidate a very weak mandate, and it could lead to instability and gridlock. The Electoral College, on the other hand, requires a candidate to win a majority of the electoral votes in order to win the election. This helps to ensure that the President has a strong mandate and that the government is stable.I urge you to resist any calls to abolish the Electoral College. The Electoral College is a vital part of our Constitution, and it should not be tampered with.Thank you for your time and consideration.","Subject: Advantages and Considerations for the Electoral College vs. a Popular Vote System.As a concerned citizen invested in the integrity of our electoral process, I write to express my views on the method of electing the President of the United States. The debate between maintaining the Electoral College and transitioning to a popular vote system is one of great importance, impacting the fundamental principles of our democracy. In considering this matter, it becomes evident that while both systems have their merits, the Electoral College remains a crucial cornerstone in preserving the balance of power and representation across our diverse nation.One of the primary arguments in favor of the Electoral College is its role in safeguarding the interests of smaller states. The College ensures that candidates must have a broad base of support across various regions and not simply focus on densely populated urban areas. This system fosters a more balanced representation of the American populace, preventing candidates from solely catering to the needs and demands of a few highly populated states while neglecting the concerns of smaller states.Moreover, the Electoral College has historically provided stability to the election process by reducing the potential for disputes and recounts. It offers a clear mechanism for determining the winner, often avoiding prolonged legal battles that could undermine the credibility and legitimacy of the presidency.Critics of the Electoral College advocate for a popular vote system, arguing that it would reflect the will of the majority more accurately. They contend that every vote should hold equal weight and that the current system can lead to instances where the winner of the popular vote loses the election, as evidenced in past elections.However, transitioning to a popular vote system raises its own set of challenges. The sheer logistical demands of conducting a nationwide popular vote would be immense and might pose significant risks in terms of managing and ensuring the accuracy of the count. Moreover, it could potentially exacerbate regional disparities, with candidates focusing primarily on densely populated urban areas to secure votes, inadvertently neglecting the concerns of rural or less populous regions.Additionally, a popular vote system might not necessarily reflect a true majority consensus, as it could result in victories driven by highly populated urban centers, overshadowing the voices and needs of vast rural communities. This imbalance could skew policy priorities and alienate significant portions of the country.In conclusion, while the popular vote system may seem more directly democratic, the Electoral College serves as a safeguard against potential pitfalls such as regional biases and the manipulation of densely populated areas' interests over others. Nevertheless, both systems have their drawbacks, and any changes should be carefully considered, taking into account the need for a fair representation of all citizens and the prevention of undue influence from specific regions.Thank you for considering these perspectives as you deliberate on this crucial matter.","Subject: The Electoral College vs. Popular Vote: A Plea for Thoughtful ConsiderationI hope this letter finds you in good health and high spirits. I am writing to express my thoughts on a matter of great importance to our democratic process the method by which we elect the President of the United States. The debate between maintaining the current Electoral College system and transitioning to a popular vote system has been a longstanding one, with passionate arguments on both sides. In this letter, I aim to articulate the reasons for both perspectives, drawing from various sources to present a comprehensive view.The Electoral College, a system established by our Founding Fathers, has been a pillar of the American electoral process for centuries. Advocates argue that it ensures equal representation for all states, preventing densely populated regions from dominating the election and leaving the concerns of smaller states ignored. This safeguard against the tyranny of the majority was designed to preserve the delicate balance between state and federal powers. As Alexander Hamilton eloquently argued in Federalist Paper No. 68, the Electoral College serves as a buffer, providing a layer of insulation against the potential influence of foreign powers and preventing the election from being swayed by uninformed or impulsive popular opinion.However, critics of the Electoral College contend that it has inherent flaws, such as the possibility of electing a candidate who loses the popular vote. They argue that this goes against the democratic principle of one person, one vote.As highlighted by many, the winner-takes-all approach in most states can lead to a situation where candidates focus their campaigns on a handful of battleground states, leaving the concerns of citizens in non-swing states overshadowed. This raises questions about the fairness and inclusivity of the current system.The call for a shift towards a popular vote system is not new, and it has gained momentum in recent years. Proponents of this change argue that it would simplify the election process, ensuring that every vote carries equal weight, regardless of the voters location. They point to the fact that direct elections are the norm in most democratic countries around the world, and the United States should not be an exception. According to a study by the National Popular Vote organization, a popular vote system would encourage candidates to campaign nationwide, addressing the concerns of citizens in every state, not just swing states.Despite these arguments, opponents of a popular vote system express concerns about its potential consequences. They argue that such a system might exacerbate regional divides, with candidates focusing solely on populous urban areas and neglecting the unique needs of rural communities. Additionally, the complexity of implementing a popular vote system on a national scale is a logistical challenge that cannot be underestimated. The current state-by-state approach provides a level of clarity and certainty that might be jeopardized by a nationwide popular vote.In evaluating these contrasting viewpoints, it is crucial to consider the historical context and the intended purpose of the Electoral College. The Founding Fathers, with their deep understanding of human nature and history, sought to establish a system that would prevent the concentration of power in the hands of a few. While the Electoral College may seem outdated to some, it reflects a commitment to the principles of federalism and a desire to protect the interests of all states, regardless of their size or population.Nonetheless, the flaws in the current system cannot be ignored, and a thoughtful reevaluation is necessary. A middle ground that addresses the concerns of both sides could involve reforming the Electoral College rather than scrapping it entirely. For example, states could reconsider their winner-takes-all approach and adopt proportional allocation of electors based on the popular vote within each state. This would still maintain the essence of the Electoral College while addressing some of the criticisms directed towards it.In conclusion, the debate over the Electoral College versus a popular vote system is a complex and multifaceted one. Both sides present compelling arguments, and any decision should be approached with a deep understanding of the consequences. As you, esteemed Senator, engage in discussions and decisions regarding the electoral process, I implore you to consider the broader implications on our democracy. Perhaps, in finding a solution, we can strike a balance that respects the intentions of our Founding Fathers while addressing the evolving needs and expectations of our diverse and dynamic nation.Thank you for your time, and I trust that you will approach this matter with the wisdom and discernment it deserves."]





len(l2)

10

In [41]:
df

Unnamed: 0,prompt_id,text,generated
0,0,Cars. Cars have been around since they became ...,0
1,0,Transportation is a large necessity in most co...,0
2,0,"""America's love affair with it's vehicles seem...",0
3,0,How often do you ride in a car? Do you drive a...,0
4,0,Cars are a wonderful thing. They are perhaps o...,0
...,...,...,...
1373,1,There has been a fuss about the Elector Colleg...,0
1374,0,Limiting car usage has many advantages. Such a...,0
1375,0,There's a new trend that has been developing f...,0
1376,0,As we all know cars are a big part of our soci...,0


In [43]:
df_new=pd.DataFrame({"prompt_id":l1,"text":l2,"generated":l3})
df_new

Unnamed: 0,prompt_id,text,generated
0,0,Limiting Car Usage: A Comprehensive Exploratio...,1
1,0,Limiting Car Usage for a Sustainable FutureIn ...,1
2,0,"In our modern society, the reliance on cars ha...",1
3,0,Limiting car usage is a topic of increasing im...,1
4,1,I hope this letter finds you well. As a concer...,1
5,0,"In today's world, cars have become an indispen...",1
6,0,Our reliance on cars has become increasingly e...,1
7,1,I am writing to you today to express my strong...,1
8,1,Subject: Advantages and Considerations for the...,1
9,1,Subject: The Electoral College vs. Popular Vot...,1


In [44]:
final_df = pd.concat([df, df_new], ignore_index=True)


In [45]:
final_df

Unnamed: 0,prompt_id,text,generated
0,0,Cars. Cars have been around since they became ...,0
1,0,Transportation is a large necessity in most co...,0
2,0,"""America's love affair with it's vehicles seem...",0
3,0,How often do you ride in a car? Do you drive a...,0
4,0,Cars are a wonderful thing. They are perhaps o...,0
...,...,...,...
1383,0,"In today's world, cars have become an indispen...",1
1384,0,Our reliance on cars has become increasingly e...,1
1385,1,I am writing to you today to express my strong...,1
1386,1,Subject: Advantages and Considerations for the...,1


In [49]:
## Shuffling the dataset
final_df = final_df.sample(frac=1).reset_index(drop=True)
final_df

Unnamed: 0,prompt_id,text,generated
0,0,Many cities are experiencing perhaps the bigge...,0
1,1,The Electoral College is a process that has be...,0
2,0,"In our society today, limiting car usage would...",0
3,1,The president of the united states is one of t...,0
4,0,"Highways, roads, streets they are everywhere. ...",0
...,...,...,...
1383,0,"In the United States, people are very busy. Wi...",0
1384,0,Almost every American has a car. Just go outsi...,0
1385,0,"All around the world, cars have influence our ...",0
1386,0,Cars are one of the main causes of air polluti...,0


In [51]:
## Creating the model without the original dataset 

train_df, dev_df = train_test_split(final_df, test_size=0.2, random_state=42)

# Build a vocabulary
vectorizer = CountVectorizer(min_df=5)  # omit words occurring less than five times
X_train = vectorizer.fit_transform(train_df['text'])
vocabulary = vectorizer.get_feature_names_out()

# Reverse index
reverse_index = {word: idx for idx, word in enumerate(vocabulary)}

# Calculate probabilities
total_documents = len(train_df)
class_counts = train_df['generated'].value_counts()

# P[word]
word_occurrence_prob = np.array(X_train.sum(axis=0) / total_documents).flatten()

# P[word | LLM]
llm_documents = train_df[train_df['generated'] == 1]
llm_word_occurrence_prob = np.array(X_train[train_df['generated'] == 1].sum(axis=0) / class_counts[1]).flatten()

# Build the LLM model
model2 = MultinomialNB()
model2.fit(X_train, train_df['generated'])

# Predict on development set
X_dev = vectorizer.transform(dev_df['text'])
predictions = model2.predict(X_dev)

# Calculate accuracy on the development set
accuracy = accuracy_score(dev_df['generated'], predictions)

# Experiments with smoothing
smoothing_values = [1.0, 0.5, 0.1, 0.01]
for alpha in smoothing_values:
    model2 = MultinomialNB(alpha=alpha)
    model2.fit(X_train, train_df['generated'])
    predictions = model2.predict(X_dev)
    accuracy = accuracy_score(dev_df['generated'], predictions)
    print(f'Accuracy with smoothing alpha={alpha}: {accuracy}')

# Derive Top 10 words that predict each class
top_words_llm = model2.feature_log_prob_[1].argsort()[-10:][::-1]
top_words_human = model2.feature_log_prob_[0].argsort()[-10:][::-1]

top_words_llm = [vocabulary[idx] for idx in top_words_llm]
top_words_human = [vocabulary[idx] for idx in top_words_human]

Accuracy with smoothing alpha=1.0: 0.9964028776978417
Accuracy with smoothing alpha=0.5: 0.9964028776978417
Accuracy with smoothing alpha=0.1: 0.9964028776978417
Accuracy with smoothing alpha=0.01: 0.9964028776978417


In [54]:

# P[class | word] using the test dataset
test_df = pd.read_csv('test_essays.csv')  # Load your test dataset
X_test = vectorizer.transform(test_df['text'])
predictions_test = model2.predict(X_test)


In [55]:
predictions_test

array([0, 0, 0], dtype=int64)