<a href="https://colab.research.google.com/github/AnshuKamath/DB-Analytics-Assignment/blob/main/Section1_R_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Section 1: Data Analysis in R - Analyzing the Renewing Innovation:
# GreenFuture's Transformation through Technology

In [None]:
!apt-get install -y r-base

In [None]:
# Install required R packages
!R -e "install.packages(c('RSQLite', 'DBI', 'dplyr', 'ggplot2'), repos='https://cran.rstudio.com/')"

In [None]:
# Load the R extension
%load_ext rpy2.ipython

In [None]:
# Clone your GitHub repository to access the dataset
!git clone https://github.com/AnshuKamath/DB-Analytics-Assignment.git

In [3]:
# Importing dataset to Google Colab
github_url <- "https://raw.githubusercontent.com/AnshuKamath/DB-Analytics-Assignment/main/greenfuture_ideas_dataset.csv"

In [6]:
# Download the dataset from GitHub
greenfuture_data <- read.csv("https://raw.githubusercontent.com/AnshuKamath/DB-Analytics-Assignment/main/greenfuture_ideas_dataset.csv")

In [None]:
# Display the first few rows to confirm successful import
head(greenfuture_data)
str(greenfuture_data)
summary(greenfuture_data)

In [13]:
# Part 2A: Demonstrating SQL Operations in R (Fixed Column Names)
# This script demonstrates basic SQL operations in R

# Load required libraries
library(RSQLite)
library(DBI)
library(dplyr)

# Create a connection to a new SQLite database
conn <- dbConnect(RSQLite::SQLite(), ":memory:")

# Create a table and import the dataset
# Note: This assumes greenfuture_data is already loaded from Part 1
dbWriteTable(conn, "ideas", greenfuture_data)

# Confirm that the table was created successfully
dbListTables(conn)

# Check the column names to ensure we use the correct ones
column_names <- dbGetQuery(conn, "PRAGMA table_info(ideas)")
print("Column names in the database:")
print(column_names$name)

# 1. SELECT Operation - Basic retrieval
select_query <- "SELECT * FROM ideas LIMIT 5"
result1 <- dbGetQuery(conn, select_query)
print("Basic SELECT operation:")
print(result1)

# 2. SELECT Operation with filtering - Get ideas with high votes
high_votes_query <- "SELECT \"Employee.ID\", \"Idea.Title\", \"Number.of.Votes\"
                   FROM ideas
                   WHERE \"Number.of.Votes\" > 50
                   ORDER BY \"Number.of.Votes\" DESC
                   LIMIT 10"
result2 <- dbGetQuery(conn, high_votes_query)
print("SELECT with filtering - High votes:")
print(result2)

# 3. SELECT with conditional filtering - Ideas from specific locations
location_query <- "SELECT * FROM ideas WHERE \"Office.Location\" IN ('London', 'Tokyo', 'New York') LIMIT 5"
result3 <- dbGetQuery(conn, location_query)
print("SELECT with location filtering:")
print(result3)

# 4. INSERT Operation - Add a new idea record
# Check if there are any existing records with Employee.ID 'EMP3001'
check_existing <- dbGetQuery(conn, "SELECT COUNT(*) as count FROM ideas WHERE \"Employee.ID\" = 'EMP3001'")
if (check_existing$count == 0) {
  insert_query <- "INSERT INTO ideas
                  (\"Employee.ID\", \"Office.Location\", Department, \"Idea.Submission.Date\", \"Idea.Title\",
                   \"Idea.Category\", \"Number.of.Votes\", \"Collaboration.Status\", \"Approval.Status\",
                   \"Implementation.Status\", \"Security.Concern.Flag\")
                  VALUES
                  ('EMP3001', 'London', 'Research', '2023-05-01', 'AI-Driven Carbon Footprint Analyzer',
                   'Renewable Energy', 0, 'Not Started', 'Pending', 'Not Started', 'No')"
  dbExecute(conn, insert_query)
  print("New idea inserted successfully")
} else {
  print("Record with EMP3001 already exists, skipping insertion")
}

# Verify the insertion
verify_insert <- "SELECT * FROM ideas WHERE \"Employee.ID\" = 'EMP3001'"
result4 <- dbGetQuery(conn, verify_insert)
print("Verifying INSERT operation:")
print(result4)

# 5. UPDATE Operation - Update the number of votes for the new idea
update_query <- "UPDATE ideas
                SET \"Number.of.Votes\" = 15, \"Collaboration.Status\" = 'In Progress'
                WHERE \"Employee.ID\" = 'EMP3001'"
rows_affected <- dbExecute(conn, update_query)
print(paste("Rows updated:", rows_affected))

# Verify the update
verify_update <- "SELECT * FROM ideas WHERE \"Employee.ID\" = 'EMP3001'"
result5 <- dbGetQuery(conn, verify_update)
print("Verifying UPDATE operation:")
print(result5)

# 6. DELETE Operation - Delete the newly added record
delete_query <- "DELETE FROM ideas WHERE \"Employee.ID\" = 'EMP3001'"
rows_deleted <- dbExecute(conn, delete_query)
print(paste("Rows deleted:", rows_deleted))

# Verify the deletion
verify_delete <- "SELECT * FROM ideas WHERE \"Employee.ID\" = 'EMP3001'"
result6 <- dbGetQuery(conn, verify_delete)
if (nrow(result6) == 0) {
  print("Record successfully deleted")
} else {
  print("Record still exists - deletion failed")
}

# Note: We're not disconnecting from the database here because we'll use
# the same connection in the next part

[1] "Column names in the database:"
 [1] "Employee.ID"           "Office.Location"       "Department"           
 [4] "Idea.Submission.Date"  "Idea.Title"            "Idea.Category"        
 [7] "Number.of.Votes"       "Collaboration.Status"  "Approval.Status"      
[10] "Implementation.Status" "Security.Concern.Flag"
[1] "Basic SELECT operation:"
  Employee.ID Office.Location           Department Idea.Submission.Date
1    587b45d8         Germany     Renewable Energy           16/04/2024
2    0d3e8b8e          Canada     Renewable Energy           14/01/2024
3    fa012fe7          France Environmental Policy           10/01/2025
4    cb8ed680           Japan           Technology           24/12/2023
5    333fff26       Australia           Technology           13/02/2025
  Idea.Title         Idea.Category Number.of.Votes Collaboration.Status
1     Idea 1 Sustainable Transport             231   Single Contributor
2     Idea 2      Water Management             477  Cross-Regional Team
3 