In [5]:
import pandas as pd

# Load the dataset
df = pd.read_csv('task\\stackoverflow_qa.csv')

# Convert 'creationdate' to datetime
df['creationdate'] = pd.to_datetime(df['creationdate'])

# 1. Questions created before 2014
before_2014 = df[df['creationdate'] < '2014-01-01']
print("Questions before 2014:\n", before_2014)

# 2. Questions with score > 50
score_above_50 = df[df['score'] > 50]
print("\nQuestions with score > 50:\n", score_above_50)

# 3. Questions with score between 50 and 100
score_50_100 = df[(df['score'] >= 50) & (df['score'] <= 100)]
print("\nQuestions with score between 50 and 100:\n", score_50_100)

# 4. Questions answered by Scott Boston
answered_by_scott = df[df['ans_name'] == 'Scott Boston']
print("\nQuestions answered by Scott Boston:\n", answered_by_scott)

# 5. Questions answered by 5 users
users = ['unutbu', 'Scott Boston', 'jezrael', 'DSM', 'Warren Weckesser']
answered_by_five = df[df['ans_name'].isin(users)]
print("\nQuestions answered by selected 5 users:\n", answered_by_five)

# 6. Questions created between Mar and Oct 2014, answered by unutbu, score < 5
filtered_questions = df[
    (df['creationdate'] >= '2014-03-01') &
    (df['creationdate'] <= '2014-10-31') &
    (df['ans_name'] == 'unutbu') &
    (df['score'] < 5)
]
print("\nFiltered questions (Mar-Oct 2014, by unutbu, score < 5):\n", filtered_questions)

# 7. Questions with score between 5–10 OR viewcount > 10,000
score_or_views = df[(df['score'].between(5, 10)) | (df['viewcount'] > 10000)]
print("\nQuestions with score 5–10 OR viewcount > 10000:\n", score_or_views)

# 8. Questions NOT answered by Scott Boston
not_by_scott = df[df['ans_name'] != 'Scott Boston']
print("\nQuestions not answered by Scott Boston:\n", not_by_scott)


Questions before 2014:
    id creationdate  score  viewcount         title  answercount  commentcount  \
0   1   2013-12-31     60       5000        Old Q1            1             0   
5   6   2011-08-03     51       8700       Classic            2             1   
6   7   2012-11-25      9      45000  Popular View            1             2   

   favoritecount quest_name  quest_rep      ans_name  ans_rep  
0              0      alice       1200        unutbu   100000  
5              0      frank        200  Scott Boston    51000  
6              2      grace        850        unutbu   100000  

Questions with score > 50:
    id creationdate  score  viewcount       title  answercount  commentcount  \
0   1   2013-12-31     60       5000      Old Q1            1             0   
3   4   2015-01-10    120       7000  High Score            1             0   
5   6   2011-08-03     51       8700     Classic            2             1   

   favoritecount quest_name  quest_rep          a

In [4]:
import pandas as pd
import os
from datetime import datetime

# Create folder if it doesn't exist
os.makedirs("task", exist_ok=True)

# Path to the CSV
filepath = "task\\stackoverflow_qa.csv"

# If file does not exist, create a sample dataset
if not os.path.exists(filepath):
    data = {
        'id': [1, 2, 3, 4, 5, 6, 7],
        'creationdate': [
            '2013-12-31', '2014-03-15', '2014-07-20', '2015-01-10',
            '2016-05-12', '2011-08-03', '2012-11-25'
        ],
        'score': [60, 3, 7, 120, 15, 51, 9],
        'viewcount': [5000, 15000, 9500, 7000, 13000, 8700, 45000],
        'title': [
            'Old Q1', 'Unutbu Example', 'Score 7', 'High Score',
            'Another Q', 'Classic', 'Popular View'
        ],
        'answercount': [1, 2, 3, 1, 2, 2, 1],
        'commentcount': [0, 1, 2, 0, 3, 1, 2],
        'favoritecount': [0, 1, 2, 5, 3, 0, 2],
        'quest_name': ['alice', 'bob', 'carol', 'dan', 'eve', 'frank', 'grace'],
        'quest_rep': [1200, 900, 1500, 3000, 500, 200, 850],
        'ans_name': ['unutbu', 'Scott Boston', 'DSM', 'Warren Weckesser', 'jezrael', 'Scott Boston', 'unutbu'],
        'ans_rep': [100000, 50000, 45000, 38000, 42000, 51000, 100000]
    }

    df_sample = pd.DataFrame(data)
    df_sample.to_csv(filepath, index=False)
    print("✅ Sample stackoverflow_qa.csv file created.")
else:
    print("📁 File already exists. Skipping creation.")


✅ Sample stackoverflow_qa.csv file created.


In [3]:
# Load Titanic dataset
titanic_df = pd.read_csv("task\\titanic.csv")

# 1. Female Passengers in Class 1 and Age between 20 and 30
female_class1_20_30 = titanic_df[
    (titanic_df['Sex'] == 'female') &
    (titanic_df['Pclass'] == 1) &
    (titanic_df['Age'].between(20, 30))
]
print("Female Class 1, Age 20–30:\n", female_class1_20_30)

# 2. Passengers who paid > $100
fare_above_100 = titanic_df[titanic_df['Fare'] > 100]
print("\nFare > $100:\n", fare_above_100)

# 3. Survived and Were Alone (SibSp=0 and Parch=0)
survived_alone = titanic_df[
    (titanic_df['Survived'] == 1) &
    (titanic_df['SibSp'] == 0) &
    (titanic_df['Parch'] == 0)
]
print("\nSurvived and alone:\n", survived_alone)

# 4. Embarked from 'C' and Fare > $50
embarked_c_fare_50 = titanic_df[
    (titanic_df['Embarked'] == 'C') &
    (titanic_df['Fare'] > 50)
]
print("\nEmbarked C & Fare > $50:\n", embarked_c_fare_50)

# 5. With siblings/spouses AND parents/children aboard
with_family = titanic_df[
    (titanic_df['SibSp'] > 0) &
    (titanic_df['Parch'] > 0)
]
print("\nPassengers with SibSp > 0 and Parch > 0:\n", with_family)

# 6. Age <= 15 and didn't survive
kids_not_survived = titanic_df[
    (titanic_df['Age'] <= 15) &
    (titanic_df['Survived'] == 0)
]
print("\nChildren (≤15) who did not survive:\n", kids_not_survived)

# 7. Have Cabin and Fare > $200
cabin_and_high_fare = titanic_df[
    (titanic_df['Cabin'].notna()) &
    (titanic_df['Fare'] > 200)
]
print("\nCabin present and Fare > $200:\n", cabin_and_high_fare)

# 8. Odd-numbered Passenger IDs
odd_passenger_ids = titanic_df[titanic_df['PassengerId'] % 2 == 1]
print("\nPassengers with odd PassengerId:\n", odd_passenger_ids)

# 9. Unique ticket numbers
unique_tickets = titanic_df[titanic_df['Ticket'].duplicated(keep=False) == False]
print("\nPassengers with unique ticket numbers:\n", unique_tickets)

# 10. 'Miss' in name and Class 1
miss_class1 = titanic_df[
    (titanic_df['Name'].str.contains('Miss')) &
    (titanic_df['Pclass'] == 1)
]
print("\n' Miss' in name and Class 1:\n", miss_class1)


Female Class 1, Age 20–30:
 Empty DataFrame
Columns: [PassengerId, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked]
Index: []

Fare > $100:
 Empty DataFrame
Columns: [PassengerId, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked]
Index: []

Survived and alone:
    PassengerId  Survived  Pclass                    Name     Sex  Age  SibSp  \
2            3         1       3  Heikkinen, Miss. Laina  female   26      0   

   Parch            Ticket   Fare Cabin Embarked  
2      0  STON/O2. 3101282  7.925   NaN        S  

Embarked C & Fare > $50:
    PassengerId  Survived  Pclass  \
1            2         1       1   

                                               Name     Sex  Age  SibSp  \
1  Cumings, Mrs. John Bradley (Florence Briggs Th.)  female   38      1   

   Parch    Ticket     Fare Cabin Embarked  
1      0  PC 17599  71.2833   C85        C  

Passengers with SibSp > 0 and Parch > 0:
 Empty DataFrame
Columns: [Passen

In [1]:
import pandas as pd

# Create sample Titanic data
data = {
    'PassengerId': [1, 2, 3, 4, 5],
    'Survived': [0, 1, 1, 1, 0],
    'Pclass': [3, 1, 3, 1, 3],
    'Name': [
        'Braund, Mr. Owen Harris',
        'Cumings, Mrs. John Bradley (Florence Briggs Th.)',
        'Heikkinen, Miss. Laina',
        'Futrelle, Mrs. Jacques Heath (Lily May Peel)',
        'Allen, Mr. William Henry'
    ],
    'Sex': ['male', 'female', 'female', 'female', 'male'],
    'Age': [22, 38, 26, 35, 35],
    'SibSp': [1, 1, 0, 1, 0],
    'Parch': [0, 0, 0, 0, 0],
    'Ticket': ['A/5 21171', 'PC 17599', 'STON/O2. 3101282', '113803', '373450'],
    'Fare': [7.25, 71.2833, 7.925, 53.1, 8.05],
    'Cabin': [None, 'C85', None, 'C123', None],
    'Embarked': ['S', 'C', 'S', 'S', 'S']
}

# Convert to DataFrame
titanic_df = pd.DataFrame(data)


In [2]:
import os

# Ensure the "task" folder exists
os.makedirs("task", exist_ok=True)

# Save to CSV
titanic_df.to_csv("task/titanic.csv", index=False)

print("✅ Titanic CSV file created at: task/titanic.csv")


✅ Titanic CSV file created at: task/titanic.csv
