# Python and pandas library exercises

In [1]:
# Imports
import pandas as pd 
import numpy as np


### Challenge No:1 **Binary Search**: 
- Given an array of integers which is sorted in ascending order, and an integer target, write a function to search target in nums. If target exists, then return its index. Otherwise, return -1.

In [None]:
class Solution:
    def search(self, nums: list[int], target: int) -> int:
        """
        Given an array of integers, finding target numbers index
        """

        lo = 0
        hi = len(nums)-1
        while lo <= hi:
            mid = (lo+hi)//2
            result = nums[mid]
            if result == target: 
                return mid
            elif result < target:
                lo = mid+1
            else:
                hi = mid-1
        return -1

### Challenge No:2 **Invalid Tweets**: 
- https://leetcode.com/problems/invalid-tweets/
- A method to find the IDs of the invalid tweets. The tweet is invalid if the number of characters used in the content of the tweet is strictly greater than 15.

In [None]:
def invalid_tweets(tweets: pd.DataFrame) -> pd.DataFrame:
    """
    Given a DataFrame of tweets find the IDs of the invalid tweets.
    """
    long_tweets = tweets[tweets["content"].str.len() > 15]
    return long_tweets[["tweet_id"]]

### Challenge No:3 **Recyclable and Low Fat Products**: 
- https://leetcode.com/problems/recyclable-and-low-fat-products/ 
- A method to find the ids of products that are both low fat and recyclable.


In [None]:
def find_products(products: pd.DataFrame) -> pd.DataFrame:
    """ Given Products DataFrame, filtered according to  being low fat and recyclable (Y)
    """

    rec_lf_products = products[(products["low_fats"] == "Y") & (products["recyclable"] == "Y")]

    return rec_lf_products[["product_id"]]

### Challenge No:4 **Big Countries**: 
- https://leetcode.com/problems/big-countries/
- A method to filter  data of countries

In [None]:
def big_countries(world: pd.DataFrame) -> pd.DataFrame:
    """Given a DataFrame, filtering according to size or population data
    """

    big_countries_data = world[(world["area"] >= 3000000) | (world["population"] >= 25000000)]

    return big_countries_data[["name", "population", "area"]]

### Challenge No:5 **Customers Who Never Ordered**: 
- https://leetcode.com/problems/customers-who-never-order/
- Given Two tables, Customers (with a primary key: Id) and Orders (with a foreign key: customerId), Write a method to find all customers who never order anything.

In [None]:
def find_customers(customers: pd.DataFrame, orders: pd.DataFrame) -> pd.DataFrame:

    unordered_customers = customers[~customers["id"].isin(orders["customerId"])]

    unordered_customers = unordered_customers[['name']].rename(columns={'name': 'Customers'})

    return unordered_customers

### Challenge No:6 **Article Views I**: 
- https://leetcode.com/problems/article-views-i/?envType=study-plan-v2&envId=30-days-of-pandas&lang=pythondata
- Write a method to find from a table of authors, articles, viewers and date (without a unique key) all the authors that viewed at least one of their own articles. Return the result table sorted by id in ascending order.

In [None]:
def article_views(views: pd.DataFrame) -> pd.DataFrame:

    self_viewer = views[views["author_id"]== views["viewer_id"]]
    self_viewer.rename(columns={"author_id":"id"}, inplace=True)
    self_viewer.drop_duplicates(subset='id',inplace = True)
    self_viewer.sort_values(by=["id"], ascending=True, inplace=True)
    
    return self_viewer[["id"]]

### Challenge No:7 **Calculate Special Bonus**: 
- https://leetcode.com/problems/calculate-special-bonus/?envType=study-plan-v2&envId=30-days-of-pandas&lang=pythondata
- Write a method to calculate the bonus of each employee from table (employee_id (primary key), name, salary ). The bonus of an employee is 100% of their salary if the ID of the employee is an odd number and the employee's name does not start with the character 'M'. The bonus of an employee is 0 otherwise. Return the result table ordered by employee_id.

In [3]:
data = [[2, 'Meir', 3000], [3, 'Michael', 3800], [7, 'Addilyn', 7400], [8, 'Juan', 6100], [9, 'Kannon', 7700]]
employees = pd.DataFrame(data, columns=['employee_id', 'name', 'salary']).astype({'employee_id':'int64', 'name':'object', 'salary':'int64'})

In [26]:
def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:
    bonus = employees.copy()
    bonus = employees[["employee_id", "salary" ]]    
    bonus.rename(columns={"salary":"bonus"}, inplace=True) 
    bonus["bonus"] = employees["salary"] * (employees["employee_id"] % 2 !=0) * (employees["name"].str[0] != "M")
    bonus = bonus.sort_values(by=["employee_id"], ascending=True)
    return bonus

calculate_special_bonus(employees)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bonus.rename(columns={"salary":"bonus"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bonus["bonus"] = employees["salary"] * (employees["employee_id"] % 2 !=0) * (employees["name"].str[0] != "M")


Unnamed: 0,employee_id,bonus
0,2,0
1,3,0
2,7,7400
3,8,0
4,9,7700


### Challenge No:8 **Fix Names in a Table**: 
- https://leetcode.com/problems/fix-names-in-a-table/?envType=study-plan-v2&envId=30-days-of-pandas&lang=pythondata
- Write a method to fix the names so that only the first character is uppercase and the rest are lowercase. Return the result table ordered by user_id.

In [15]:
def fix_names(users: pd.DataFrame) -> pd.DataFrame:
    users["name"] = (users["name"].str.lower().str.capitalize())
    corrected = users.sort_values(by="user_id")
    return corrected

In [16]:
data = [[1, 'aLice'], [2, 'bOB']]
users = pd.DataFrame(data, columns=['user_id', 'name']).astype({'user_id':'Int64', 'name':'object'})
fix_names(users)

Unnamed: 0,user_id,name
0,1,Alice
1,2,Bob


### Challenge No:9 **Find Users With Valid E-Mails**: 
- https://leetcode.com/problems/find-users-with-valid-e-mails/description/?envType=study-plan-v2&envId=30-days-of-pandas&lang=pythondata
- Write a solution to find the users who have valid emails.A valid e-mail has a prefix name and a domain where:

The prefix name is a string that may contain letters (upper or lower case), digits, underscore '_', period '.', and/or dash '-'. The prefix name must start with a letter.
The domain is '@leetcode.com'.
Return the result table in any order.

In [19]:
def valid_emails(users: pd.DataFrame) -> pd.DataFrame:
    
    pattern = r'^[a-zA-Z][a-zA-Z0-9_.-]*@leetcode\.com$'
  
    valid_users = users[users["mail"].str.contains(pattern)]
    
    return valid_users

In [20]:
data = [[1, 'Winston', 'winston@leetcode.com'], [2, 'Jonathan', 'jonathanisgreat'], [3, 'Annabelle', 'bella-@leetcode.com'], [4, 'Sally', 'sally.come@leetcode.com'], [5, 'Marwan', 'quarz#2020@leetcode.com'], [6, 'David', 'david69@gmail.com'], [7, 'Shapiro', '.shapo@leetcode.com']]
users = pd.DataFrame(data, columns=['user_id', 'name', 'mail']).astype({'user_id':'int64', 'name':'object', 'mail':'object'})

valid_emails(users)

Unnamed: 0,user_id,name,mail
0,1,Winston,winston@leetcode.com
2,3,Annabelle,bella-@leetcode.com
3,4,Sally,sally.come@leetcode.com


### Challenge No:9 **Patients with Condition**:
- https://leetcode.com/problems/patients-with-a-condition/description/?envType=study-plan-v2&envId=30-days-of-pandas&lang=pythondata
- Write a solution to find the patient_id, patient_name, and conditions of the patients who have Type I Diabetes. Type I Diabetes always starts with DIAB1 prefix. Return the result table in any order.



In [24]:
def find_patients(patients: pd.DataFrame) -> pd.DataFrame:
    patients = patients[patients["conditions"].str.contains(r"\bDIAB1")]
    return patients

In [25]:
data = [[1, 'Daniel', 'SADIAB100 COUGH'], [2, 'Alice', ''], [3, 'Bob', 'DIAB100 MYOP'], [4, 'George', 'ACNE DIAB100'], [5, 'Alain', 'DIAB201']]
patients = pd.DataFrame(data, columns=['patient_id', 'patient_name', 'conditions']).astype({'patient_id':'int64', 'patient_name':'object', 'conditions':'object'})
find_patients(patients)

Unnamed: 0,patient_id,patient_name,conditions
2,3,Bob,DIAB100 MYOP
3,4,George,ACNE DIAB100
