<a href="https://colab.research.google.com/github/AaronSam-30052003/supply_chain_disruption/blob/main/supply_chain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = 'https://www.imf.org/en/publications/weo'

response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

articles = []

count = 0
for article in soup.find_all('article'):
    if count >= 50:
        break

    title = article.find('h2')
    description = article.find('p')
    link = article.find('a', href=True)
    source = article.find('span', class_='source')
    published = article.find('time')

    if title and description and link:
        articles.append({
            'Title': title.get_text(),
            'Description': description.get_text(),
            'URL': link['href'],
            'Source': source.get_text() if source else 'Unknown',
            'Published At': published.get_text() if published else 'Unknown'
        })
        count += 1

news_df = pd.DataFrame(articles)

print(news_df.head())

csv_file_path = "supply_chain_impact_news.csv"
try:
    news_df.to_csv(csv_file_path, index=False)
    print(f"Data saved to {csv_file_path}")
except IOError as e:
    print(f"Error saving data to CSV: {e}")


                                 Title  \
0  World Economic Outlook - All Issues   

                                         Description                   URL  \
0  The World Economic Outlook (WEO) is a survey o...  /ar/Publications/WEO   

    Source Published At  
0  Unknown      Unknown  
Data saved to supply_chain_impact_news.csv


In [13]:
import requests
import json
import time
import pandas as pd

API_KEY_MEDIASTACK = '939de3f5d7bcd9030de41c23754ec20b'
BASE_URL_MEDIASTACK = 'https://api.mediastack.com/v1/news'
API_KEY_GOOGLE = "AIzaSyBRu1jSldkSrJbWXcHPOWfy7s1K_TlvV54"
CSE_ID = "d03a2f288016547d9"
API_KEY_NEWSAPI = "271b4406b0f24a899c09bb77c3723780"


def fetch_media_stack_articles():
    params = {
        'access_key': API_KEY_MEDIASTACK,
        'keywords': 'transportation',
        'category': 'business'
    }
    response = requests.get(BASE_URL_MEDIASTACK, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None


def filter_mediastack_articles(data):
    if data and 'data' in data:
        filtered_articles = [
            {
                'source': article.get('source'),
                'publisher': article.get('author'),
                'title': article.get('title'),
                'content': article.get('description'),
                'language': article.get('language'),
                'date': article.get('published_at')
            }
            for article in data['data']
        ]
        return filtered_articles
    else:
        print("No articles retrieved.")
        return []


def search_engine_news(query, num_results=100):
    url = "https://www.googleapis.com/customsearch/v1"
    results = []
    start_index = 1
    while len(results) < num_results:
        params = {
            "key": API_KEY_GOOGLE,
            "cx": CSE_ID,
            "q": query,
            "num": 10,
            "start": start_index,
            "sort": "date"
        }
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            items = data.get("items", [])
            if not items:
                break
            results.extend(items)
            start_index += 10
            time.sleep(1)
        else:
            print(f"Error: {response.status_code}, {response.text}")
            break
    return results[:num_results]


def filter_search_engine_news(results):
    filtered_results = []
    for item in results:
        title = item.get("title", "No title")
        link = item.get("link", "No link")
        snippet = item.get("snippet", "No content")
        publisher = item.get("displayLink", "Unknown publisher")
        filtered_results.append({
            "title": title,
            "url": link,
            "content": snippet,
            "publisher": publisher,
            "source": publisher,
            "date": "Unknown date"
        })
    return filtered_results


def fetch_newsapi_articles(api_key, query, page_size=100, max_results=100):
    url = "https://newsapi.org/v2/everything"
    all_articles = []
    page = 1
    while len(all_articles) < max_results:
        params = {
            "q": query,
            "apiKey": api_key
        }
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            articles = data.get('articles', [])
            if not articles:
                break
            all_articles.extend(articles)
            page += 1
        else:
            print(f"Error: {response.status_code}")
            break
    return all_articles


media_stack_data = fetch_media_stack_articles()
media_stack = filter_mediastack_articles(media_stack_data)
if media_stack:
    df_media_stack = pd.DataFrame(media_stack)
    df_media_stack.to_csv("media_stack_filtered_supply_chain_articles.csv", index=False)

query = "shipment delay"
results = search_engine_news(query)
filtered_results = filter_search_engine_news(results)
if filtered_results:
    df_google = pd.DataFrame(filtered_results)
    df_google.to_csv("custom_search_engine_news.csv", index=False)

query = "disruption"
all_articles = fetch_newsapi_articles(API_KEY_NEWSAPI, query)
if all_articles:
    df_newsapi = pd.DataFrame(all_articles)
    df_newsapi.drop(columns=["urlToImage", "source"], inplace=True, errors='ignore')
    df_newsapi.rename(columns={"publishedAt": "source"}, inplace=True)
    df_newsapi.to_csv("newsapi_articles.csv", index=False)


In [12]:
import requests
import pandas as pd
import datetime

API_KEY = '3404823e287ae20e445a35512bf9fff4'

def fetch_weather_data(city, days=5):
    url = f"http://api.openweathermap.org/data/2.5/onecall/timemachine"
    latitude, longitude = get_city_coordinates(city)
    if not latitude or not longitude:
        print("City coordinates not found!")
        return

    weather_data = []
    current_time = int(datetime.datetime.now().timestamp())

    for day in range(days):
        timestamp = current_time - (day * 86400)
        params = {
            "lat": latitude,
            "lon": longitude,
            "dt": timestamp,
            "appid": API_KEY,
            "units": "metric"
        }
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            weather_data.append({
                "date": datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d'),
                "temperature": data['current']['temp'],
                "humidity": data['current']['humidity'],
                "weather": data['current']['weather'][0]['description']
            })
        else:
            print(f"Error fetching data for {day} days ago: {response.status_code}")
            break

    return weather_data

def get_city_coordinates(city):
    url = f"http://api.openweathermap.org/data/2.5/weather"
    params = {
        "q": city,
        "appid": API_KEY
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        return data['coord']['lat'], data['coord']['lon']
    else:
        print(f"Error fetching city coordinates: {response.status_code}")
        return None, None

def save_to_csv(data, filename="weather_data.csv"):
    if not data:
        print("No data to save!")
        return
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Weather data saved to {filename}")

city_name = "Chennai"
weather_data = fetch_weather_data(city_name)

if weather_data:
    save_to_csv(weather_data)


Error fetching city coordinates: 401
City coordinates not found!
