In [5]:
pip install python-docx

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[K     |████████████████████████████████| 244 kB 4.2 MB/s eta 0:00:01
Installing collected packages: python-docx
Successfully installed python-docx-1.1.2
Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [12]:
from docx import Document

# Create a new Word document
doc = Document()

# Add title
doc.add_heading('📈 Reddit Sentiment Analysis with Pipedream & Streamlit', level=1)

# Add project overview
doc.add_paragraph("🚀 Automated Reddit sentiment analysis using Pipedream for data collection, Google Sheets/PostgreSQL for storage, and Streamlit for interactive visualization.")

doc.add_heading('🛠️ Project Overview', level=2)
doc.add_paragraph("This project tracks sentiment on Reddit discussions in real-time using Pipedream automation. "
                  "The data is processed using NLP sentiment analysis, stored in Google Sheets/PostgreSQL, and visualized in a Streamlit dashboard.")

doc.add_heading('💡 Use Cases', level=2)
doc.add_paragraph("✅ Track public sentiment on AI, tech trends, crypto, or politics\n"
                  "✅ Monitor brand reputation based on subreddit discussions\n"
                  "✅ Automate data collection for continuous analysis")

doc.add_heading('📈 Architecture & Tech Stack', level=2)
tech_stack_table = doc.add_table(rows=1, cols=2)
hdr_cells = tech_stack_table.rows[0].cells
hdr_cells[0].text = "Component"
hdr_cells[1].text = "Technology Used"

tech_stack = [
    ("Data Collection", "🛠️ Pipedream (Reddit API)"),
    ("Sentiment Analysis", "🧠 TextBlob, Vader (NLP)"),
    ("Storage", "📊 Google Sheets / PostgreSQL"),
    ("Visualization", "📈 Streamlit, Plotly"),
    ("Deployment", "🌐 Streamlit Cloud / Hugging Face Spaces")
]

for item in tech_stack:
    row_cells = tech_stack_table.add_row().cells
    row_cells[0].text = item[0]
    row_cells[1].text = item[1]

doc.add_heading('🛠️ Features', level=2)
doc.add_paragraph("✅ Serverless automation with Pipedream\n"
                  "✅ Reddit API Integration (PRAW, REST API)\n"
                  "✅ Sentiment analysis using NLP (TextBlob, Vader)\n"
                  "✅ Data storage in Google Sheets/PostgreSQL\n"
                  "✅ Interactive Streamlit Dashboard")

doc.add_heading('🔄 Workflow Breakdown', level=2)

doc.add_heading('1️⃣ Automating Data Collection with Pipedream', level=3)
doc.add_paragraph('''
import { axios } from "@pipedream/platform";

export default defineComponent({
  async run({ steps }) {
    const redditUrl = "https://www.reddit.com/r/technology/hot.json?limit=50";
    const response = await axios(this, { method: "GET", url: redditUrl });

    return response.data.data.children.map(post => ({
      title: post.data.title,
      upvotes: post.data.ups
    }));
  }
});
''')

doc.add_heading('2️⃣ Storing Data in Google Sheets / PostgreSQL', level=3)
doc.add_paragraph("#### Option 1: Google Sheets\n- Pipedream sends the processed Reddit data to Google Sheets using the Google Sheets API.\n\n#### Option 2: PostgreSQL Database")
doc.add_paragraph('''
CREATE TABLE reddit_sentiment (
    id SERIAL PRIMARY KEY,
    post_title TEXT,
    sentiment_score FLOAT,
    upvotes INT,
    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
''')

doc.add_heading('3️⃣ Visualization with Streamlit', level=3)
doc.add_paragraph('''
import streamlit as st
import pandas as pd
import plotly.express as px

st.title("📊 Reddit Sentiment Analysis")

# Load data
df = pd.read_csv("reddit_sentiment.csv")

# Sentiment Distribution Plot
fig = px.histogram(df, x="Sentiment", nbins=20, title="Sentiment Analysis")
st.plotly_chart(fig)
''')

doc.add_heading('👨‍💻 How to Run This Project', level=2)

steps = [
    ("1️⃣ Clone the Repository", "git clone https://github.com/yourusername/reddit-sentiment-analysis.git\ncd reddit-sentiment-analysis"),
    ("2️⃣ Install Dependencies", "pip install praw textblob pandas plotly streamlit"),
    ("3️⃣ Set Up Environment Variables", "Create a `.env` file and add:\nCLIENT_ID=your_reddit_client_id\nCLIENT_SECRET=your_reddit_client_secret\nUSER_AGENT=MyRedditSentimentApp"),
    ("4️⃣ Run Data Collection Script", "python fetch_reddit_data.py"),
    ("5️⃣ Run Streamlit Dashboard", "streamlit run app.py")
]

for step in steps:
    doc.add_heading(step[0], level=3)
    doc.add_paragraph(step[1])

doc.add_heading('📈 Results & Insights', level=2)
doc.add_paragraph("Sample Insights from Reddit Analysis:\n"
                  "- AI-related posts received highly positive sentiment.\n"
                  "- Crypto-related discussions were more polarized.\n"
                  "- Negative sentiment spikes corresponded with controversial news.")

doc.add_heading('💪 Future Improvements', level=2)
doc.add_paragraph("✅ Train a custom ML model for more accurate sentiment classification\n"
                  "✅ Expand to multiple subreddits to track cross-community sentiment\n"
                  "✅ Use Named Entity Recognition (NER) to extract key topics\n"
                  "✅ Deploy Streamlit app publicly on Streamlit Cloud / Hugging Face")

doc.add_heading('📚 Contributions', level=2)
doc.add_paragraph("👥 Contributions are welcome! If you’d like to enhance this project:\n1. Fork the repo\n2. Create a new feature branch\n3. Submit a PR with improvements")

doc.add_heading('💎 License', level=2)
doc.add_paragraph("📚 MIT License - Feel free to use and modify this project.")

doc.add_heading('📱 Connect With Me', level=2)
doc.add_paragraph("📺 GitHub: [@yourgithub](https://github.com/yourusername)\n"
                  "👤 LinkedIn: [Your Name](https://linkedin.com/in/yourname)\n"
                  "🌐 Portfolio: [yourwebsite.com](https://yourwebsite.com)")

# Save the document
doc_path = "/Users/bolade/env/sentiment_analysis_projects/reddit_sentiment_analysis/Reddit_Sentiment_Readme.docx"
doc.save(doc_path)

# Provide the download link
doc_path


'/Users/bolade/env/sentiment_analysis_projects/reddit_sentiment_analysis/Reddit_Sentiment_Readme.docx'