# Testing APIs

#### Config

In [60]:
import configparser

# Load configuration
config = configparser.RawConfigParser()
config.read('../config/config.ini')

# Twitter API credentials
API_KEY = config['TWITTER_API']['API_KEY']
API_SECRET = config['TWITTER_API']['API_SECRET']
ACCESS_TOKEN = config['TWITTER_API']['ACCESS_TOKEN']
ACCESS_SECRET = config['TWITTER_API']['ACCESS_SECRET']
BEARER_TOKEN = config['TWITTER_API']['BEARER_TOKEN']

# Azure Event Hub Kafka settings
KAFKA_BROKER = config['AZURE_EVENTHUB']['BROKER']
EVENT_HUB_NAME = config['AZURE_EVENTHUB']['EVENT_HUB_NAME']
KAFKA_SASL_USERNAME = config['AZURE_EVENTHUB']['SASL_USERNAME']
KAFKA_SASL_PASSWORD = config['AZURE_EVENTHUB']['SASL_PASSWORD']

# Reddit API Settings
REDDIT_CLIENT_ID = config['REDDIT_API']['CLIENT_ID']
REDDIT_CLIENT_SECRET = config['REDDIT_API']['CLIENT_SECRET']
REDDIT_USER_AGENT = config['REDDIT_API']['USER_AGENT']
REDDIT_USERNAME = config['REDDIT_API']['USERNAME']
REDDIT_PASSWORD = config['REDDIT_API']['PASSWORD']

print("Config loaded successfully!")

Config loaded successfully!


#### Testing Tweepy

In [61]:
import tweepy
import pandas as pd
import time

In [47]:
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True)

# Test authentication
try:
    api.verify_credentials()
    print("Authentication successful!")
except Exception as e:
    print(f"Error: {e}")


Authentication successful!


In [48]:
client = tweepy.Client(bearer_token=BEARER_TOKEN, consumer_key=API_KEY, consumer_secret=API_SECRET, access_token=ACCESS_TOKEN, access_token_secret=ACCESS_SECRET)
auth = tweepy.OAuth2BearerHandler(BEARER_TOKEN)
api = tweepy.API(auth)
# Search for recent tweets
query = "fashion -is:retweet lang:en"  # Keywords + filter for retweets
response = client.search_recent_tweets(query=query, max_results=10, tweet_fields=["created_at", "text"])

# Parse and store data
data = []
if response.data:
    for tweet in response.data:
        data.append({
            "id": tweet.id,
            "created_at": tweet.created_at,
            "text": tweet.text
        })

# Convert to DataFrame
df = pd.DataFrame(data)
print(df)


                    id                created_at  \
0  1869087265127125142 2024-12-17 18:28:24+00:00   
1  1869087235427172369 2024-12-17 18:28:17+00:00   
2  1869087207174328518 2024-12-17 18:28:10+00:00   
3  1869087182184689829 2024-12-17 18:28:04+00:00   
4  1869087157920600371 2024-12-17 18:27:59+00:00   
5  1869087150886867395 2024-12-17 18:27:57+00:00   
6  1869087145358712856 2024-12-17 18:27:56+00:00   
7  1869087143500648723 2024-12-17 18:27:55+00:00   
8  1869087122965381609 2024-12-17 18:27:50+00:00   
9  1869087119135895917 2024-12-17 18:27:49+00:00   

                                                text  
0  Checkout on this shop on Shopee! Fashion  http...  
1  @Spread_Star @voguebusiness @Khulood_Almani @S...  
2  I paid for express shipping, and Fashion Nova ...  
3  @Ashy_slashee Hmmm... 🤔\n\nIt's a tough one th...  
4  🌟HOT HOT earn more with our crypto pump! $BTC ...  
5  they're stronger than me because if i had the ...  
6  i keep a copy of K&amp;R C in russian a

In [49]:
df['text'][0]

'Checkout on this shop on Shopee! Fashion  https://t.co/XLTOcf7ygW: https://t.co/sygAl8rFQD'

#### Testing Reddit API

In [87]:
import praw
import pandas as pd
import re
# Reddit API credentials
reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent=REDDIT_USER_AGENT,
    redirect_uri="http://localhost",
)

In [71]:
# Subreddit to analyze
subreddit = reddit.subreddit("malefashionadvice")

# Fetch posts from 'hot', 'new', or 'top'
posts = []
for post in subreddit.hot(limit=100):  # Change limit as needed
    posts.append({
        "title": post.title,
        "score": post.score,
        "num_comments": post.num_comments,
        "created_utc": post.created_utc,
        "selftext": post.selftext
    })

# Convert to DataFrame
df = pd.DataFrame(posts)
df.head()

Unnamed: 0,title,score,num_comments,created_utc,selftext
0,WAYWT (What Are You Wearing This Week?) - 23 O...,6,23,1729688000.0,WAYWT = What Are You Wearing This Week (or a d...
1,➡️ Daily Simple Questions ⬅️- Style feedback a...,1,4,1734455000.0,Welcome to the Daily Questions thread for all ...
2,I've held my tongue as long as I can.,405,125,1734420000.0,Some of the questions that get asked here are ...
3,Does anyone know what this suit jacket style i...,2,0,1734460000.0,[https://imgur.com/a/9SDo8Jw](https://imgur.co...
4,Tom Ford Oud Wood - 1.7oz,2,0,1734460000.0,Tom Ford Oud Wood - 1.7oz for $110 from Costco...


In [77]:
df['selftext'][99]

'Looking to get my first proper (ie. not “smart”) watch for nicer outfits. Being both a Swiss design and rail travel aficionado, a Mondaine Swiss Rail clock watch seems perfect for my taste. Any opinions on them? Good quality for the price? Long lasting?'

#### Images in Reddit

In [93]:
def fetch_reddit_posts(subreddit_name, num_posts=20, num_comments=3):
    """
    Fetch posts from a subreddit, including images, galleries, subreddit section, and top comments.

    :param subreddit_name: Name of the subreddit to fetch data from.
    :param num_posts: Number of posts to fetch.
    :param num_comments: Number of top comments to fetch per post.
    :return: DataFrame with Reddit post details (title, content, images, gallery, URL, section, top comments).
    """
    subreddit = reddit.subreddit(subreddit_name)
    posts = []

    # Updated regex to include jpeg, jpg, png, gif, imgur links
    image_regex = r"(https?://(?:i\.)?imgur\.com/[a-zA-Z0-9]+(?:\.jpg|\.jpeg|\.png|\.gif)?|" \
                  r"https?://.*\.(jpg|jpeg|png|gif))"

    # Fetch posts from the 'hot' section
    for submission in subreddit.hot(limit=num_posts):
        post_images = []

        # Extract image URLs from the post URL
        if submission.url:
            if re.search(image_regex, submission.url):
                post_images.append(submission.url)

        # Extract image links from the post's selftext
        if submission.selftext:
            images_in_text = re.findall(image_regex, submission.selftext)
            post_images.extend([img[0] for img in images_in_text])

        # Extract gallery images if the post contains a gallery
        if hasattr(submission, "gallery_data") and submission.gallery_data:
            media_metadata = submission.media_metadata
            for item in submission.gallery_data["items"]:
                media_id = item["media_id"]
                if media_id in media_metadata:
                    img_url = media_metadata[media_id]["s"]["u"]
                    post_images.append(img_url)

        # Remove duplicates in image links
        post_images = list(set(post_images))

        # Fetch top comments
        top_comments = []
        submission.comment_sort = "best"
        submission.comments.replace_more(limit=0)  # Remove "more comments" placeholders
        for comment in submission.comments[:num_comments]:
            if comment.body:
                top_comments.append(comment.body.strip())

        # Store post details
        post_data = {
            "title": submission.title,
            "content": submission.selftext,
            "images": ", ".join(post_images) if post_images else None,
            "url": submission.url,
            "section": "hot",
            "top_comments": " | ".join(top_comments) if top_comments else None
        }
        posts.append(post_data)

    # Convert to a DataFrame
    df = pd.DataFrame(posts)
    return df


In [101]:
subreddit_name = "malefashionadvice"
num_posts = 20
num_comments = 3

# Call the updated function
reddit_df = fetch_reddit_posts(subreddit_name, num_posts, num_comments)
print(f"Fetched {len(reddit_df)} posts from r/{subreddit_name} (hot section).")

# Display the DataFrame
reddit_df.head()


  submission.comment_sort = "best"


Fetched 20 posts from r/malefashionadvice (hot section).


Unnamed: 0,title,content,images,url,section,top_comments
0,WAYWT (What Are You Wearing This Week?) - 23 O...,WAYWT = What Are You Wearing This Week (or a d...,https://imgur.com/upload,https://www.reddit.com/r/malefashionadvice/com...,hot,https://preview.redd.it/av4u4k27wl1e1.jpeg?wid...
1,➡️ Daily Simple Questions ⬅️- Style feedback a...,Welcome to the Daily Questions thread for all ...,,https://www.reddit.com/r/malefashionadvice/com...,hot,Was wondering which faceshape I have. I'd say ...
2,I've held my tongue as long as I can.,Some of the questions that get asked here are ...,,https://www.reddit.com/r/malefashionadvice/com...,hot,The ones that irk me always go something like:...
3,Does anyone know what this suit jacket style i...,[https://imgur.com/a/9SDo8Jw](https://imgur.co...,https://imgur.com/a,https://www.reddit.com/r/malefashionadvice/com...,hot,Worth noting that double breasted tailoring co...
4,Does a brown tweed suit and light pink pinstri...,I have a really nice dark brown tweed suit tha...,,https://www.reddit.com/r/malefashionadvice/com...,hot,"I think you should try them on, see if you lik..."


In [106]:
reddit_df['top_comments'][1]

"Was wondering which faceshape I have. I'd say oval, maybe diamond? Second opinions would be very helpful\n\n[https://imgur.com/a/rtBuIUe](https://imgur.com/a/rtBuIUe) | Anyone know what this specific type of jacket is called? It's a parka with a fur hood but what I particularly like about it is that it looks super lightweight/thin (basically not puffy and insulated like normal jackets/parkas) and good for casual wear.\n\nhttps://preview.redd.it/h4rxm74p7g7e1.jpeg?width=736&format=pjpg&auto=webp&s=9debc4abf2fb31c015a27ff613845289ef17c8b2 | Hey! I recently snagged up a vintage suit that I found nice. On paper it should’ve fit me rather well but I see now that I need to make some adjustments. \n\nI don’t mind a wider and boxier fit but I need to slim and shorten the jacket atleast.\n\nWhat recommendations would you have for this suit? \n\nhttps://imgur.com/a/6zrAXhw"

In [107]:
reddit_df.to_csv('reddit_data.csv', index=False)