# From _main.ipynb_

##### Basic imports

In [4]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.naive_bayes import MultinomialNB
# from transformers import BertTokenizer, BertForSequenceClassification, AdamW
import pandas as pd
import numpy as np 

In [5]:
import pandas as pd

def encode_text_colum(df, text_column, vectorizer): 
    """
    Encodes a text column with the given vectorizer, drops the old column (with text),
    and returns the dataframe with the encoded text.

    Args:
        df (pd.DataFrame): The dataframe.
        text_column (str): The name of the text column to be encoded.
        vectorizer: The text vectorizer.

    Returns:
        pd.DataFrame: The dataframe with the encoded text.
    """
    # Vectorize the text column
    df_vectorized = vectorizer.fit_transform(df[text_column])
    
    # Create a dataframe from the vectorized data
    df_encoded = pd.DataFrame(df_vectorized.toarray(), columns=[f"{text_column}_{i}" for i in range(df_vectorized.shape[1])])

    # Drop the original text column
    df.drop([text_column], axis=1, inplace=True)

    # Concatenate the original dataframe with the encoded text dataframe
    df = pd.concat([df, df_encoded], axis=1)

    return df

# Example usage:
# vectorizer = TfidfVectorizer(max_features=1000)
# X_train_ab = encode_text_column(X_train, 'abstract', vectorizer)
# X_test_ab = encode_text_column(X_test, 'abstract', vectorizer)
# X_train_de = encode_text_column(X_train_ab, 'description_text', vectorizer)
# X_test_de = encode_text_column(X_test_ab, 'description_text', vectorizer)


##### Loading the dataframe

In [6]:
df = pd.read_csv('data/modelready_220423.csv')

##### Preprocessing

In [32]:
# extract unique countries in the df
unique_values = set()
df['countries_in_family'].apply(lambda x: unique_values.update(x.strip("[]").replace("'", "").split())) 

# Create new columns for each unique value
for value in unique_values:
    # each country has a column (1 if the patent belong to the country 0 otherwise)
    df[value] = df['countries_in_family'].apply(lambda x: 1 if value in x else 0)


In [8]:
df = df[df.abstract.notna()].copy() # drop all samples without abstract

In [33]:
# encode company names
df['company_name_encoded'] = df.company_name.astype('category').cat.codes  # encode companies

# remove non-numeric columns
df_columns_dropped = df.drop(['publication_number', 'company_name', 'countries_in_family', 'publn_nr',
       'primary_cpc'], axis = 1)


# f0_ has the same value as commercialization, the other two shouldn't be used
df_columns_dropped = df_columns_dropped.drop(['f0_', 'centrality', 'similarity'], axis = 1)

In [34]:
### PROBLEMATIC CELL ###
# WHAT SHOULD I DO HERE ???????

# remove text as I can't compute min and max on it
text = df_columns_dropped[['abstract', 'description_text']] # putting them aside for later


df_columns_dropped.drop(['abstract', 'description_text'], axis=1, inplace=True)

print(f'missing values = {df_columns_dropped.isna().sum().sum()} ')# some missin values
df_no_missing = df_columns_dropped.fillna(df_columns_dropped.mean()).copy()
print(f'missing values after filling= {df_no_missing.isna().sum().sum()} ')

missing values = 88097 
missing values after filling= 0 


In [35]:
# putting text back in
#df_no_missing[['abstract', 'description_text']] = text  


# extracting what we'll try to predict
y = df_no_missing['commercialized']
df_no_missing.drop('commercialized', axis= 1, inplace=True)

# dropping columns where all the value are the same (min = max) they would be zero if I apply min max rescaling
min_eq_max = df_no_missing.columns[df_no_missing.min() == df_no_missing.max()].to_list()
df_clean = df_no_missing.drop(min_eq_max, axis=1)

# putting text back in
df_clean[['abstract', 'description_text']] = text 


In [36]:
# split the data
X_train, X_test, y_train, y_test = train_test_split(df_clean, y, test_size=0.20, random_state=42)

#rescale 
# scaler = StandardScaler()
# scaler.fit(X_train)
# X_train_scaled = scaler.transform(X_train)
# X_test_scaled = scaler.transform(X_test)

In [37]:
print(X_train.isna().sum().sum() / (X_train.shape[0] * X_train.shape[1]))

0.0


In [38]:
# bag of words for abstract
vectorizer = TfidfVectorizer(max_features=1000)  # Adjust 'max_features' as needed
X_train_ab = encode_text_colum(X_train, 'abstract', vectorizer)
X_test_ab = encode_text_colum(X_test, 'abstract', vectorizer)

In [39]:
print(X_train_ab.isna().sum().sum() / (X_train_ab.shape[0] * X_train_ab.shape[1]))
print(X_test_ab.isna().sum().sum() / (X_test_ab.shape[0] * X_test_ab.shape[1]))

0.25519205390011807
0.45341488277268094


In [40]:
# Replace NaN values with zeros in X_train_ab
X_train_ab.fillna(0, inplace=True)

# Replace NaN values with zeros in X_test_ab
X_test_ab.fillna(0, inplace=True)


In [41]:
print(X_train_ab.isna().sum().sum() / (X_train_ab.shape[0] * X_train_ab.shape[1]))
print(X_test_ab.isna().sum().sum() / (X_test_ab.shape[0] * X_test_ab.shape[1]))

0.0
0.0


In [42]:
# Train a Multinomial Naive Bayes classifier
naive_bayes_model = MultinomialNB()
naive_bayes_model.fit(X_train_ab, y_train)

# Make predictions on the test set
y_pred = naive_bayes_model.predict(X_test_ab)

# Evaluate the performance
accuracy = accuracy_score(y_test, y_pred)
classification_report_str = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report_str)

ValueError: could not convert string to float: 'FIELD OF THE INVENTION \n     The invention relates to an improved enteral nutritional formula and more particularly to infant formulas which contain ribo-nucleotide equivalents at a level of at least 10 mg/100 Kcal of formula and wherein the ribo-nucleotide components are at specific ratios. \n     BACKGROUND OF THE INVENTION \n     The composition of human milk serves as a valuable reference for improving infant formula. However, human milk contains living cells, hormones, active enzymes, immunoglobulins and components with unique molecular structures that cannot be replicated in infant formula. Unlike human milk, infant formula must remain stable on the shelf for up to thirty-six (36) months. These fundamental differences between human milk and infant formula often mandate differences in the composition to achieve similar clinical outcome. \n     Human milk has served as a valuable reference for improving infant formula. The investigation of human milk components has stimulated many investigations into what constituents may be added to infant formula. Greater knowledge of the composition of human milk affords the opportunity to design infant formulas that are closer to that of human milk. However, it becomes increasingly apparent that infant formula can never duplicate human milk. Many constituents in human milk are bioactive and because of synergies among these components there is little reason to believe that the same compound would have the same bioactivity in infant formula. The likelihood of this possibility is further diminished when the impact of heat treatment for sterilization and long-term storage of the formula is considered. The present invention is based, in part, on the concept of providing a formula which matches the performance of breast milk in most parameters without attempting to actually duplicate the delicate balance of human milk components. \n     The composition of human milk differs appreciably from that of other species and much attention has been paid to the various components. Several investigators have reported on the nucleotide content of milk from humans [Janas, L. M. et al: The Nucleotide Profile of Human Milk. Pediatr. Res. 16:659-662(1982) and Gil, A et al: Acid-soluble Nucleotides of Human Milk at Different Stages of Lactation. Journal of Dairy Research (1982), 49, 301-307.] The numerous publications cited in the Janas and Gil references also relate to the nucleotide composition of human milk and, in combination, leave one skilled in the art with a confused and conflicting understanding of the nucleotide composition of human milk. None of the prior art discloses the minimum level of nucleotide equivalents taught by the present invention nor the ratio of the four elements (adenosine, cytidine, guanosine and uridine) to each other. Most importantly the prior art does not suggest or disclose a formula that is superior to human milk in enhancing the immune response of a human. \n     Nucleosides are nucleotides minus the one to three phosphate groups. Nucleosides are a class of chemical compounds that are of importance in physiological and medical research. They may be obtained from the partial decomposition (hydrolysis) of nucleic acids. Nucleosides contain a purine or pyrimidine base linked to either d-ribose, forming ribosides, or d-deoxyribose, forming deoxyribosides. Nucleosides are nucleotides minus the phosphorus group. Representative of the nucleosides are adenosine, cytidine, guanosine, inosine and uridine. \n     Nucleotides (nucleosides plus at least one phosphate group) are the fundamental units of nucleic acids. The nucleotides found in nucleic acids are phosphate esters of the nucleosides. The term nucleotides is also sometimes applied to compounds not found in nucleic acids and which contain substances other than the usual purines and pyrimidines. The nucleotides inosine-5&#39;-monophosphate and guanosine-5&#39;-monophosphate are used as flavor potentiators. \n     Nucleotides are ubiquitous, low molecular weight compounds that participate in energy metabolism and modulation of enzymatic reactions. In addition, nucleotides are components of compounds that are crucial in the synthesis and catabolism of carbohydrates, lipids, protein, and nucleic acids. Clearly nucleotides and their metabolites are important determinants of numerous cellular processes. \n     Adequate cellular supplies of nucleotides in humans and animals are maintained by two pathways; the salvage pathway and de novo synthesis. The salvage pathway involves recovery of nucleotides and nucleosides liberated from metabolism (such as catabolized nucleic acids). De novo synthesis of nucleotides requires the precursors aspartate, glutamine, glycine, and carbamoyl phosphate. The salvage pathway generally supplies sufficient quantities of nucleotides even in tissue with rapidly proliferating cells, including enterocytes, erythrocytes and immune cells. It is also known that addition of nucleotides to the diet inhibits the de novo pathway and activates the salvage pathway in the liver and extrahepatic tissue, especially in enterocytes. \n     Dietary sources rich in nucleotides include meats, fish, legumes, and dairy products. Nucleotides are primarily present in polymeric forms (DNA, RNA and nucleoproteins) in these foods and are degraded by ribonucleases, deoxyribonucleases and proteases, yielding nucleotides. Subsequent action of phosphatases yields nucleosides which appear to be the preferred form for absorption. Some additional digestion to free purine and pyrimidine bases may occur. Studies have been published that indicate that a specific transport system(s) exists for the absorption of nucleosides and bases. \n     Most dietary nucleotides are degraded, excreted, or utilized before reaching the systemic circulation. Although dietary nucleotides appear to have little access to the systemic circulation, they have been implicated as having numerous systemic effects. Reports indicate that dietary nucleotides influence the response to sepsis, alter blood lipid profiles, enhance brain function, and increase iron absorption, gut mucosal growth, and gut bifidobacteria populations. \n     U.S. Pat. No. 3,231,385 discloses and claims an active phosphatase free cow&#39;s milk which contains at least two of the respective disodium salts of (a) cytidine 5&#39;-monophosphate in the amount of 10 to 20 mg/L of cow&#39;s milk, (b) guanosine 5&#39;monophosphate in the amount of 0.2 to 0.4 mg/L of cow&#39;s milk, (c) uridine 5&#39;-monophosphate in the amount of 1.2 to 1.4 mg/L of cow&#39;s milk, (d) guanosine 5&#39;-diphosphate in the amount of 0.4 to 0.6 mg/L of cow&#39;s milk, (e) uridine 5&#39;-diphosphate glucose in the amount of 0.5 to 1.0 mg/L of cow&#39;s milk, (f) uridine 5&#39;-diphosphate galactose in the amount of 0.5 to 1.0 mg/L of cow&#39;s milk and (g) uridine 5&#39;-diphosphate glucuronic acid in the amount of 1.0 to 3.0 mg/L of cow&#39;s milk. \n     U.S. Pat. No. 4,994,442 discloses and claims the addition of nucleosides and/or nucleotides to infant formula to provide a formula having enhanced physiological properties and methods of stimulation or repair of intestinal gut cells. This patent teaches and claims the use of at least one member selected from the group consisting of uridine, uridine phosphate, and mixtures thereof; guanosine, guanosine phosphate and mixtures thereof; adenosine, adenosine phosphate and mixtures thereof; cytidine, cytidine phosphate and mixtures thereof; and inosine, inosine phosphate and mixtures thereof. This patent also claims a method for enhancing the immune response of T-cells and for providing specific fatty acid phospholipid profiles in red blood cell membranes of infants. This patent fails to suggest the use of the four specific ribo-nucleotides disclosed in the present invention. This reference also fails to suggest the specific ratios and levels of ribo-nucleotides used in this invention and the surprising results relating to the immune system and diarrhea that are achieved through the present invention. \n     U.S. Pat. No. 5,066,500 discloses a non-milk based infant formula comprising carbohydrates, a source of amino acids, vegetable oils, minerals, vitamins, wherein the formula contains at least one of uridine, uridine phosphate or mixtures thereof; guanosine, guanosine phosphate or mixtures thereof; or adenosine, adenosine phosphate, or mixtures thereof; cytidine, cytidine phosphate, or mixtures thereof, or inosine, inosine phosphate, or mixtures thereof. This patent fails to disclose the four specific ribo-nucleotides utilized in the instant invention, the levels and ratios of those nucleotides in an enteral nutritional formula and the surprising results that can be obtained through the use of the instant invention. \n     U.S. Pat. No. 4,544,559 discloses and claims a nucleotide enriched humanized milk in powder form. The inventive aspect of this patent relates to the use of five (5) nucleotides in the precise ratios as follows: adenosine mono-phosphate (AMP) 1.32 mg/100 g, cytidine-monophosphate (CMP) 1.12 mg/100 g, guanosine-monophosphate (GMP) 1.49 mg/100 g, uridine-monophosphate (UMP) 3.42 mg/100 g and inosine-monophosphate (IMP) 0.45 mg/100 g of powdered formula. In contrast, the present invention only uses four (4) ribo-nucleotide equivalents, (as will be defined infra) those being represented by: cytidine 5&#39;-monophosphate (CMP), uridine 5&#39;monophosphate (UMP) guanosine 5&#39;monophosphate (GMP) and adenosine 5&#39;monophosphate (AMP). Also critical to the instant invention, is that these four ribo-nucleotides (or nucleotide equivalents) be present in the enteral formulation at a level of at least 10 milligrams of nucleotide equivalents per 100 Kcal of enteral formula. An even more specific aspect of the present invention, which sets it apart from the prior art is the requirement that the weight ratio of CMP to UMP be at least 1.5:1; that the ratio of CMP to AMP be at least 2:1; and the ratio of CMP to GMP be at least 1.75:1 (on a nucleotide equivalent basis.) \n     U.K. Patent 2,216,416 discloses a method of stimulating the immune function with the aid of a nucleobase source, the use of nucleobase sources for immuno stimulation and compositions comprising such nucleobase sources. Specifically, this patent relates to the administration of from 0.1 to 75 grams of RNA, DNA, nucleotides or nucleosides per day or an amount equivalent thereto in nucleobase form. This reference fails to suggest or disclose the specific benefits that can be realized through the use of four ribo-nucleotides at specific levels and ratios. \n     The enteral formula of the instant invention provides a positive advantage to the infant. The clinical studies which were conducted evidence the unexpected advantages of the instant invention. An additional aspect of the present invention is the overall balance of nutrient interactions and bio-availability, which provide an improved nutritional product. Another aspect of the present invention relates to an infant formula which meets the requirements of the Infant Formula Act and to methods for its production and analytical techniques for the determination of nucleotide equivalents. \n     Several investigators have reported that maternal milk contains factors that protect against diarrhea. These investigators have also reported that nucleotide-enriched formula have an effect on the incidence, duration and etiology of acute diarrhea. These investigators have failed to discover the specific nucleotides and ratios of the instant invention that are effacious in the treatment/prevention of diarrhea. \n     There has been much interest in this area of enteral nutritional formulations. The prior art is replete with various formulations using various ingredients. The general principle of adding RNA, DNA, nucleotides, nucleosides and/or nucleobases to food products is disclosed within the prior art. However, none of the prior art either taken individually or in any combination would suggest or predict, with any level of certainty, the discoveries the Applicants have made herein. \n     DISCLOSURE OF THE INVENTION \n     The term &#34;nucleotide equivalents&#34; as used herein means the total adenosine (A), cytidine (C), guanosine (G) and uridine (U) content present in any one or more of the following forms: ribo-nucleosides, ribo-nucleotides, RNA, phosphate esters and d-ribose adducts of adenosine (A), cytidine (C), guanosine (G), and uridine (U). The various forms of A,C,G, and U are determined, calculated and expressed as the monophosphate esters; adenosine monophosphate (AMP), cytidine monophosphate (CMP), guanosine monophosphate (GMP) and uridine monophosphate (UMP). These are the free acid forms of the monophosphate esters as opposed to the salt forms such as the mono or disodium salts. Some nucleotides are often sold as the sodium salts. For example, the sum of adenosine from RNA, the mono-, di- and triphosphate esters and the d-ribose adducts are stated as the nucleotide equivalent of the mono phosphate ester of adenosine. This invention relates only to the use of ribo-nucleotides and does not contemplate or claim the use of the deoxy form. \n     There is disclosed an enteral formula, said formula comprising: 1) protein, said protein being of a concentration of between 10 and 35 grams per liter of formula; (2) fat, said fat being of a concentration of between 20 and 45 grams per liter of formula; (3) carbohydrates, said carbohydrates being of a concentration of between 60 and 110 grams per liter of formula; and (4) at least 10 mg of nucleotide equivalents per 100 Kcal of formula, said nucleotide equivalents consisting of RNA; mono-, di-, and triphosphate esters of adenosine, cytidine, guanosine and uridine, and the d-ribose adjuncts thereof; and wherein the weight ratio of CMP:UMP is at least 1.5:1; of CMP: AMP is at least 2:1; and of CMP:GMP is at least 1.75:1. \n     The minimum level of nucleotide equivalents in this invention is 10 mg per 100 Kcal of formula or 70 mg per liter of a formula having a caloric density of about 687 Kcal per liter. Levels of nucleotide equivalents at the claimed ratios above about 1.0 gms per liter of formula or about 100 mg/100 Kcal of formula is outside the scope of this invention. From this maximum level of about 100 mg/100 Kcal of formula or 1.0 g/liter of formula and the ratios set out above, the upper limits for each of the four-ribo-nucleotide components may be calculated. \n     There is also disclosed an enteral formula wherein the source of protein is selected from the group comprising condensed skim milk, non-fat milk, acid whey and cheese whey. In general, any appropriate source of protein can be used in this invention, including hydrolyzed proteins. There is further disclosed an enteral formula wherein the protein is 50-70% by weight condensed skim milk or non-fat milk and the fat is selected from the group consisting of soy oil, coconut oil, corn oil, high oleic safflower oil, marine oils, egg yolk oils, high oleic sunflower oils, fungal oils and mixtures thereof. \n     There is also disclosed an infant formula which comprises a nutritionally adequate source of amino nitrogen, carbohydrates, edible fats, minerals and vitamins; the improvement characterized in a composition comprising at least one member selected from each of the groups (a), (b), (c), and (d): \n     (a) uridine, uridine phosphates and mixtures thereof; \n     (b) guanosine, guanosine phosphates and mixtures thereof; \n     (c) adenosine, adenosine phosphate and mixtures thereof; and \n     (d) cytidine, cytidine phosphate and mixtures thereof; wherein the total amount of the composition is at least 10 mg per 100 Kcal of formula and wherein the weight ratio of CMP:UMP is at least 1.5:1 of CMP:AMP is at least 2:1; and of CMP:GMP is at least 1.75:1. \n     There is also disclosed an infant formula the improvement comprising adding from 29 to 39 mg of CMP per liter of formula; 15 to 20 mg of UMP per liter of formula; 10 to 15 mg of AMP per liter of formula and 14 to 20 mg of GMP per liter of formula. \n     More specifically there is disclosed a formula wherein the protein is 50-70% condensed skim milk or non-fat milk and 30-50% cheese whey by weight and the fat has as its source soy, coconut and high oleic safflower oil. \n     The enteral formula according to the invention provides a source of carbohydrates selected from sucrose, corn syrup, glucose polymers and other carbohydrate sources. The formula may also contain dietary fiber. The teachings of U.S. Pat. No. 5,021,245 are incorporated herein by reference. \n     This invention also relates to a method of enhancing the immune system of a human, said method consists of feeding a human in need of treatment a formula, the improvement comprising a formula consisting essentially of: 1) protein, said protein being of a concentration of between 10 and 35 grams per liter of formula; 2) fat, said fat being of a concentration of between 20 and 45 grams per liter of formula; 3) carbohydrates, said carbohydrates being of a concentration of between 60 and 110 grams per liter of formula; and 4) at least 10 mg of nucleotide equivalents per 100 Kcal of formula and wherein the weight ratio of CMP:UMP is at least 1.5:1; of CMP:AMP is at least 2:1; and of CMP:GMP is at least 1.75:1. There is also disclosed a novel method of producing an enteral formula containing nucleotides and its use to treat or prevent diarrheal disease and to novel analytical techniques. \n     There is also disclosed a method for manufacturing infant formula, said method comprising the steps of: 1) dispersing an appropriate quantity of protein in water or oil sufficient to solubilize or suspend the protein, thereby forming a protein solution; 2) dissolving carbohydrates in water, thereby forming a carbohydrate solution; 3) mixing minerals in water or the carbohydrate solution, thereby forming a mineral solution or a mineral/carbohydrate solution; 4) combining appropriate quantities of said protein solution, said carbohydrate solution, said mineral solution, and a solution of oils containing oil soluble vitamins, 5) heat processing and homogenizing the combined solution; 6) adding water soluble vitamins, iron, choline and other nutrients to the combined solution; 7) adding water to dilute the combined solution to the desired caloric density, approximately 400-725 kcal per liter of formula and 8) adding from 29 to 39 mg of CMP per liter of formula; 15 to 21 mg of UMP per liter of formula; 10 to 16 mg of AMP per liter of formula and 14 to 20 mg of GMP per liter of formula directly to the batch or in the form of an aqueous solution. \n     As used herein, the terms CMP, UMP, GMP and AMP mean not only the monophosphates of adenosine, cytidine, guanosine and uridine but also their nucleotide equivalents which include polymeric RNA, ribo-nucleosides, ribo-nucleoside-containing adducts and di- and triphosphate ribo-nucleotides. \n     There is also disclosed a novel analytical technique that can quantify the various forms of the four nucleotides in complex food matrices. The analytical process comprises: 1) the enzymatic digestion of polymeric RNA to nucleotide 2) the enzymatic co-digestion of nucleoside containing adducts to nucleosides and the nucleotides to nucleosides 3) the covalent attachment of the nucleosides to boronic acid that has been immobilized on a polyacrylamide gel 4) the release of the nucleosides from the boronate derivatized polyacrylamide gel via a pH shift 5) the separation of the nucleosides via low pH reverse phase/ion pairing HPLC using octane sulfonate as the ion-pairing agent and 6) quantitation of the nucleoside via U.V. absorbance using external standards or other means known in the art of analytical chemistry. \n     There is also disclosed a novel antioxidant system that is used in the enteral formulas according to this invention. The antioxidant system consists of β-carotene, R,R,R, α-tocopherol and selenium. The level of R,R,R, α-tocopherol can range from 10 to 30 IU per liter of formula. The level of β-carotene can range from 375 to 575 μg per liter of formula and the level of selenium can range from 14 to 32 mcg per liter of formula. The selenium used in this aspect of the invention may be delivered in the form of selenate. The teachings of U.S. Pat. No. 5,221,545 are herein incorporated by reference. \n     In actual use, the formula of this invention may be consumed by any infant and should be in compliance with accepted levels of vitamins, minerals, micro-components and the like. The amount consumed does not differ from that associated with the normal consumption of commercially available infant formula. \n     A representative formula for the enteral nutritional product of the invention is set forth in Table I. \n     \n                       TABLE I                                                     \n______________________________________                                    \nFORMULA ACCORDING TO THE INVENTION                                        \nNutrient      Concentration per liter of formula                          \n______________________________________                                    \nProtein       13.0-20 g                                                   \nProtein Source                                                            \nCSM.sup.1     55-75%                                                      \n              7.15-15 g                                                   \nWPC.sup.2     25-45%                                                      \n              3.25-9.0 g                                                  \nLipid         13-21 g                                                     \nH.O. Safflower Oil                                                        \n              35-55%                                                      \nSoy Oil       20-40%                                                      \nCoconut Oil   20-45%                                                      \nCarbohydrate lactose                                                      \n              70-110 g                                                    \nNucleotides   70-100 mg                                                   \nCMP           29-39 mg                                                    \nUMP           15-21 mg                                                    \nAMP           10-16 mg                                                    \nGMP           14-20 mg                                                    \nIron           8-16 mg                                                    \nR,R,R,α 10-30 IU                                                    \ntocopherol                                                                \nβCarotene                                                            \n              375-575 μg                                               \nSelenium      14-32 mcg                                                   \nCalcium       475-850 mg                                                  \nPhosphorus    240-700 mg                                                  \nCa:P Ratio    1.4 to 2.4                                                  \n______________________________________                                    \n .sup.1 CSM is Condensed Skim Milk                                        \n .sup.2 WPC is Whey Protein Concentrate                                   \n \n    \n     The pediatric nutritional formula of this invention is generally prepared using the following method. An appropriate quantity of protein is dispersed in sufficient water or oil to solubilize or suspend it, thereby forming a protein solution/suspension. Typically this protein source would be intact milk proteins and/or hydrolyzed milk proteins. A carbohydrate source such as one or more of corn syrup solids, lactose maltodextrins and sucrose is dissolved in water, thereby forming a carbohydrate solution. A source of dietary fiber, such as soy polysaccharide, may also be added. Appropriate minerals are dissolved in water, the carbohydrate solution or oil, so as to form a mineral solution. \n     Once formed, the three solutions (protein, carbohydrate, and mineral) are combined in appropriate quantities with oils and oil soluble vitamins. This resulting solution is then heat processed and homogenized. Following processing, water soluble vitamins, iron, choline and other nutrients are added and then the nucleotides are added. The solution is then diluted with water to the appropriate caloric density, approximately 670-725 kcal per liter of formula. The formula is then dispensed into containers and retorted to obtain commercial sterility or packaged aseptically using commercially available techniques and equipment. As prepared, the formula contains appropriate nutrients in compliance with the Infant Formula Act as of the date of this application. It should also be recognized that the unique formula of this invention could be prepared for use in powdered form or as a concentrated liquid. \n    \n    \n     The invention will be better understood in view of the following examples, which are illustrative only and should not be construed as limiting the invention. \n     DETAILED DESCRIPTION OF THE INVENTION \n     Analytical Techniques \n     One feature of the instant invention resides in the novel analytical technique used to identify and quantify the nucleotide equivalents useful in this invention. Analysis of certain starting materials, especially the protein, will determine the actual amount of nucleotides to be added. This analysis of the raw materials of the formula is critical to determine what nucleotides, if any, are contained in the starting materials. The analytical method is also critical to determine the proper ratios of the nucleotides to each other. The analytical method according to this invention will determine nucleotide equivalent levels in complex food matrices. The method in general exploits the enzymatic digestion of various forms of ribonucleic acids to the simple monomeric ribonucleosides and the ability of the cis-diol groups of ribonucleosides to form a pH dependent covalent complex with boronic acid. Boronate derivatized polyacryl amide gel is used to very selectively prefractionate ribonucleoside directly from complex matrices. The isolated ribonucleosides are subsequently separated via low pH reverse phase/ion-pairing HPLC using octanesulfonate as the ion-pairing agent. Ribonucleosides are detected via UV absorbance, and the corresponding levels are determined by comparison to external standards. The method can be used to quantitate inherent levels of ribonucleoside in foods. Because of the selective prefractionation, the method is essentially matrix independent. It should be understood that the novel analytical technique of this invention will not detect nucleosides from DNA or any form of nucleic acid that does not contain the cis-diol groups of ribose. It has been used to determine ribonucleic acid types and levels in infant and medical nutritional products, human milk, protein commodities, and clinical and commercial animal chows. \n     The following is an example of the analytical technique of this invention that can be used to determine the presence and ratios of the nucleotide equivalents. \n     EXAMPLE I \n     Analysis of Similac® with Iron \n     To a 10 ml Reacti-Therm vial with stir bar was placed 2.0 ml of Similac® with Iron (a non-fat milk protein infant formula produced by the Ross Products Division of Abbott Laboratories, ready to feed form, 676 Kcal per liter) 3.0 ml of 50 mM sodium acetate at pH 5.1, 50 μl of 10 mM zinc sulfate and 50 μl of the enzyme preparation nuclease P1 (Sigma Chemical). The enzyme preparation was 5 mg of dry enzyme powder, as received from Sigma, and 4 ml of 50 μM sodium acetate at pH 5.1. The mixture was heated to 37° C. and stirred for 16 hours. This reaction converted the polymeric RNA to monomeric 5&#39;mono-nucleotides. \n     To the same reaction vial was added 50 μl of 30% ammonium hydroxide, 1 ml of 0.5M ammonium acetate (pH 8.75), 50 μl of 1.0M magnesium chloride, 50 μl of bacterial alkaline phosphatase (BAP) (Sigma Chemical as a suspension) and 50 μl of a nucleotide pyrophosphatase enzyme preparation (Sigma Chemical). The pyrophosphatase enzyme preparation was 5 mg of dry powder in 4 ml of 0.5M ammonium acetate buffer. The mixture was incubated at 37° C. for three hours. This reaction converted the nucleoside containing adducts and the nucleotides to the ribonucleosides. \n     The reaction mixture was transferred to a 50 ml volumetric flask using 25 ml of 0.5M sodium phosphate, pH 10.5. Water was added to a final volume of 50 ml. The sample mixture was shaken and may be filtered to remove insoluble protein. \n     5 grams of dried Affi-Gel-601, boronate derivatized (from Bio-Rad) was hydrated in 50 ml of 100 mM phosphate buffer at pH 6.5. To a 10 ml open column was added the hydrated Affi-Gel-601 to obtain a packed volume of about 1 ml. The gel was converted to the basic form by washing with 5 ml aliquots of 0.25M sodium phosphate buffer, pH 10.5, until the gel no longer swelled. The gel was now about 2 ml in volume. The gel was resuspended in the buffer to maintain adequate flow. \n     To the prepared gel was added 10 ml of the sample that was previously treated with the enzymes and the eluant was discarded. At this point, the nucleosides are covalently attached, through the cis-diol groups, to the boronic acid gel. The gel was washed with 20 ml of 0.25M sodium phosphate, pH 10.5 and the eluant was discarded. The nucleosides were eluted and collected in a 10 ml volumetric flask by adding 2 ml of 1.0M phosphoric acid to the column followed by 5 ml of 0.1M phosphoric acid. At this point the nucleosides have been isolated from the sample and are now ready to be characterized. \n     The volumetric flask was brought to a final volume of 10 ml with water. The sample was then placed on a HPLC for separation and quantification of nucleosides using external standards. The nucleosides were separated via low pH, reverse phase, ion pairing chromatography using an acetonitrile gradient. The nucleosides were detected by U.V. absorbance at 260 nm and 280 nm. Nucleosides were quantified by reference to external standards and the results were converted to the corresponding monophosphate nucleotide value by multiplying the nucleoside value by the molecular weight ratio of the monophosphate nucleotide over the nucleoside. The results were expressed as mg/L, of mononucleotide. \n     \n         ______________________________________                                    \nNUCLEOTIDES IN SIMILAC ® WITH IRON                                    \n______________________________________                                    \n         uridine - 3-5                                                    \n         guanosine - trace                                                \n         adenosine - trace                                                \n         inosine - (trace, &lt;0.5 ppm)                                      \n         cytidine - 1-3                                                   \n______________________________________                                    \n \n    \n     It should be noted that some samples have been found to be active with respect to nucleic acid degradation. Of particular concern is the enzymatic conversion of AMP to IMP. Heat inactivation has proven to be effective in rendering the sample inactive. The procedure for heat inactivation is to heat the sample to over 100° C. for at least 15 minutes. After the sample has cooled, buffer, enzyme, and zinc are added and the first hydrolysis is carried out. \n     This analytical technique was used on raw materials to determine base line nucleotide content and on final clinical product to confirm the presence and concentration of the four nucleotides used in the invention. \n     EXAMPLE II \n     Preparation of Enteral Formula \n     On a commercial scale, a control and an experimental formula according to the invention were prepared having the compositions set forth in Table II. The two formula are as close as possible to being identical except for the nucleotide components. \n     \n                       TABLE II                                                    \n______________________________________                                    \nCOMPOSITION OF STUDY FEEDINGS                                             \n            per liter                                                     \n              CON       NUC                                               \nNutrient      (Control) (Formula of the Invention)                        \n______________________________________                                    \nProtein, g    14.0      14.4                                              \nFat, g        36.5      38.3                                              \nCarbohydrate, g                                                           \n              77.1      75.5                                              \nCalcium, mg   544.4     532.5                                             \nPhosphorus, mg                                                            \n              295.0     316.2                                             \nMagnesium, mg 73.5      77.7                                              \nSodium, mg    170.1     179.2                                             \nPotassium, mg 931       948.6                                             \nChloride, mg  487.7     493.2                                             \nIron, mg      14.0      14.0                                              \nZinc, mg      5.1       5.1                                               \nCopper, mcg   608       608                                               \nIodine, mcg   61        61                                                \nManganese, mcg                                                            \n              34        34                                                \nVitamin A, IU 2930      2970                                              \nVitamin D, IU 405       405                                               \nVitamin E, IU 24.6      24.8                                              \nVitamin K, mcg                                                            \n              54        54                                                \nVitamin C, mg 170       172                                               \nβ-Carotene mcg                                                       \n              450       450                                               \nSelenium, mcg 23        23                                                \nThiamin, mcg  1350      1360                                              \nRiboflavin, mcg                                                           \n              1014      1014                                              \nPyridoxine, mcg                                                           \n              480       480                                               \nVitamin B.sub.12, mcg                                                     \n              1.7       1.7                                               \nNiacin, mcg   7095      7095                                              \nFolic acid, mcg                                                           \n              101       101                                               \nPantothenic acid, mcg                                                     \n              3041      3041                                              \nBiotin, mcg   30        30                                                \nTaurine, mg   45        45                                                \nCholine, mg   108       108                                               \nInositol, mg  32        32                                                \nEnergy, Kcal  676       676                                               \nCMP, mg       2.72*     31.2                                              \nUMP, mg       4.19*     17.7                                              \nAMP, mg       0.57*     9.8                                               \nGMP, mg       0.45*     14.4                                              \n______________________________________                                    \n * -- inherent levels from raw materials                                  \n \n    \n     In this example, a 7711 Kg batch of the formula according to the invention was prepared (NUC). The control formula (CON) was prepared in a similar fashion except the addition of the nucleotides was omitted. The list of ingredients and amounts are found in Table III. \n     \n                       TABLE III                                                   \n______________________________________                                    \nIngredients and Amounts for NUC Formula                                   \nIngredient              Amount                                            \n______________________________________                                    \nHigh Oleic Safflower Oil                                                  \n                        120.2 Kg                                          \nCoconut Oil             85.7 Kg                                           \nSoy Oil                 80.3 Kg                                           \nLecithin                2.92 Kg                                           \nMono- and diglyceride   2.92 Kg                                           \nOil Soluble Vit. Premix 0.365 Kg                                          \nβCarotene          0.0137 Kg                                         \nCarrageenan             1.43 Kg                                           \nWhey Protein Concentrate                                                  \n                        61.2 Kg                                           \nLactose                 476.3 Kg                                          \nPotassium Citrate       4.6 Kg                                            \nMagnesium Chloride      0.735 Kg                                          \nLow Heat Condensed      821 Kg                                            \nSkim Milk                                                                 \nCalcium Carbonate       3.36 Kg                                           \nFerrous sulfate         0.450 Kg                                          \nWater Soluble Vitamin   1.11 Kg                                           \nPremix Trace Minerals/                                                    \nTaurine                                                                   \nCholine Chloride        0.600 Kg                                          \nAdenosine 5&#39;monophosphate                                                 \n                        0.113 Kg                                          \nGuanosine 5&#39;monophosphate-Na2                                             \n                        0.197 Kg                                          \nCytidine 5&#39;monophosphate                                                  \n                        0.259 Kg                                          \nUridine 5&#39;monophosphate-Na2                                               \n                        0.216 Kg                                          \nAscorbic Acid           1.78 Kg                                           \n45% KOH                 2.36 Kg                                           \nTotal Yield             7711 Kg                                           \n______________________________________                                    \n \n    \n     The first step is the preparation of the oil blend. To an appropriately sized blend tank with agitation and heating soy oil, coconut oil and high oleic safflower oil were added. The mixture was heated to 73.8°-79.4° C. The lecithin and mono-and diglycerides (Myverol 18-06) were added to the blend tank with agitation. The oil soluble vitamin premix was added with agitation. The premix container was rinsed with the oil blend and transferred back to the blend tank to ensure complete delivery of the vitamin premix. The beta-carotene was added to the oil blend and the mixture agitated until the components were well dispersed. The beta-carotene container was rinsed with the oil blend and the contents returned to the blend tank to ensure complete delivery of the beta-carotene solution. Lastly, the carrageenan was added to the oil blend and the mixture was agitated and held at 54.4°-60° C. until used. \n     The carbohydrate, mineral and CSM (condensed skim milk) protein slurry was prepared next. To water heated to 68.3°-73.8° C. the lactose was added and the mixture agitated until the lactose was well dissolved. Potassium citrate was then added followed by potassium chloride, sodium chloride and magnesium chloride. The condensed skim milk (CSM) was then added. Tri-calcium phosphate was added, the mixture agitated and held at 54.5°-60° C. until used. \n     The protein-in-water (PIW) slurry was then prepared. The whey protein concentrate was added to water at 54.5°-60° C. under mild agitation. The PIW slurry was held under mild agitation until needed. Also contemplated in this invention is the use of protein-in-fat (PIF) slurries, wherein an appropriate amount of protein is admixed with all or a portion of the oil component. \n     The PIW slurry was then added to the prepared oil blend. The required amount of the carbohydrate, mineral and CSM slurry was then added to the oil blend. The pH of the mixture was then determined and if below specification it was adjusted using KOH to a pH of 6.75 to 6.85. The mixture was then held at 54.4°-60° C. under agitation for at least 15 minutes. \n     The mixture was then heated to 68.3°-73.8° C. and deaerated under vacuum. The mixture was then emulsified through a single stage homogenizer at 6.21 to 7.58 MPa. \n     After emulsification, the mixture was heated to 120°-122° C. for 10 seconds and then 149°-150° C. for 5 seconds. The mixture was then passed through a flash cooler to reduce the temperature to 120°-122° C. and then through a plate cooler to reduce the temperature to 71.1°-79.4° C. The mixture was then passed through a two stage homogenizer at 26.89 to 28.27 MPa and 2.76 to 4.14 MPa. The mixture was held at 73.9° to 83.2° C. for 16 seconds and then cooled to 1.1° to 6.7° C. At this point, samples are taken for microbiological and analytical testing. The mixture was held under agitation. \n     A calcium carbonate solution may be prepared for use in adjusting the calcium level of the mixture if outside of specification. \n     A vitamin stock solution was prepared. To water heated to 37.8° to 65.6° C. was added potassium citrate and ferrous sulfate. The vitamin premix was then added and the mixture agitated. The choline chloride was added and then the required amount of this vitamin mixture was added to the batch. \n     The nucleotide solution was then prepared. The following nucleotides were added to water with mild agitation in the following order: AMP, GMP, CMP, UMP. Agitation was continued for about 10 minutes to dissolve the nucleotides. The nucleotide solution was then added to the batch. This is one critical aspect of the invention. It is extremely important that the nucleotides be added after the homogenizations and heat treatments. Numerous experiments have been conducted that have shown the addition of the nucleotides at any other point will result in degradation of the nucleotides and thereby change the specific levels and ratios as claimed. It is believed that AMP is converted to IMP through the presence of adenosine deaminase in the raw materials, especially the protein components. \n     Lastly, an ascorbic acid solution was prepared and added slowly to the batch with agitation for at least 10 minutes. Final dilution with water to meet specified levels of solids and caloric density was completed. The batch was then packaged in 32 ounce metal cans and sterilized using conventional technology. \n     EXAMPLE III \n     Clinical Study of Enteral Formula \n     The purpose of the clinical investigation was to determine the effect of a nucleotide-fortified formula according to the present invention on the development of the neonatal immune system in infants as measured by the antibody response to childhood vaccines. \n     This was a 12-month, randomized, controlled, blinded, multi-site trial of term infants. Infants enrolled into the study received human milk (HM) or one of two clinically labelled formulas: 1) control formula (CON) or 2) CON formula supplemented with nucleotides (NUC). The analyzed composition of each formula is set forth in Table II. A total of 311 infants completed the study (107 CON, 101 NUC, 103 HM). Infants followed the immunization schedule recommended by the American Academy of Pediatrics with single lots of Hib TITER® Hemophilus influenzae type b conjugate vaccine (Diphtheria CRM 197 and tetanus protein conjugate sold by Lederle, Inc.) and Diphtheria and Tetanus Toxoids and Pertussis Vaccine Adsorbed, sold by Lederle, Inc. Infants were full-term with a gestational age of 38-42 weeks, at or above the 5th percentile for weight, length, and head circumference and were enrolled between 2 and 10 days of age. All subjects were healthy with no indication of systemic disease and did not receive any medications, mineral, or vitamin supplements. \n     The primary outcome variable investigated was vaccine response at 6, 7, and 12 months of age. Also investigated were differential white blood cell count, lymphocyte subset analysis, NK activity, and lymphoblast transformation in response to specific and non-specific stimuli at 2, 6, 7, and 12 months of age. Secondary outcome variables included intake, anthropometry, and indices of tolerance (stool characteristics and incidence of spit-up). \n     Also investigated was the antioxidant status of infants fed the formula according to the present invention which contained the novel antioxidant system of: 10-30 IU of R, R, R, α-tocopherol per liter of formula, 375-575 μg of β-carotene per liter of formula and 14-32 mcg of selenium per liter of formula. \n     During infancy, as in adulthood, the body has a number of antioxidant systems to protect against injury from free radicals, the products of oxidation. The antioxidant system of this invention was clinically proven to promote the antioxidant status of the infant greater than currently available infant formula. This improved antioxidant status was demonstrated as a function of increased levels of plasma Vitamin E, reduced levels of plasma lipid peroxides, and increased free radical trapping capacity. \n     Experimental Design \n     At 2, 4, and 6 months of age DPT and Hib vaccines were administered. Blood samples were obtained by venipuncture at 2, 6, 7, and 12 months of age. When vaccines were administered the blood sample was obtained before the inoculation. Parents of the infants agreed to feed the infant only study formula until 4 to 6 months of age when table foods were added to supplement the study formula. The HM fed group were exclusively breast fed up to 2 months of age and a mixture of HM and Similac® with Iron (Ross Products Division of Abbott Laboratories) after 2 months, if necessary. \n     Weight, length and head circumference were measured at 21 days of age and at 2, 4, 6, 7, and 12 months of age. Three-day records of formula intake, frequency of spit-up and vomiting and the frequency, color and consistency of stools were used to assess tolerance. Blood samples (2 mL) were drawn at 2 months of age and transferred directly into a heparin-containing tube, and gently inverted. At 4, 6, 7, and 12 months of age 5 mL of blood was collected. Two and a half mL were transferred to heparinized tubes and 2.5 ml to a plain tube without an anti-coagulant. Tubes of blood were carefully packed in thermally insulated containers and shipped to the laboratory for analysis. \n     Radial immunodiffusion assays were performed using standard kits purchased from The Binding Site, Inc (5889 Oberlin Drive, Suite 101, San Diego, Calif. 92121) for the measurement of serum or plasma IgG and IgA. \n     The detection of tetanus and diphtheria IgG was accomplished as follows. Tetanus toxoid antigen (Connaught) was diluted in 0.05M carbonate buffer (pH 9.6) to 2 μg/mL, added to the wells of microtiter plates at 200 μL per well, and incubated at room temperature for 1 hour. Diphtheria toxoid antigen (Connaught) was diluted in the same manner to 15 μg/mL. The coated plates were washed three times in PBS containing 0.05% chicken egg albumin and 0.1% Tween 20. Samples and positive control tetanus and diphtheria toxoid immune globulin, were diluted in PBS/albumin/Tween, added to triplicate wells at 200 μL/well, and incubated at room temperature for 1 hour. PBS alone was also added to triplicate wells to provide a blank. Plates were again washed three times in PBS/albumin/Tween. Affinity purified horseradish peroxidase-conjugated goat anti-human IgG (The Binding Site, Inc) was diluted in PBS/albumin/Tween, added to the microtiter plates, and again incubated at room temperature for 1 hour. Tetramethylbenzidine (TMB) substrate (Kirkegaard and Perry Laboratories) was added to all wells at 100 μL/well, and incubated at room temperature for 10 minutes. The substrate reaction was stopped by adding 100 μL of 1M phosphoric acid per well. Optical density of each well was measured using a wavelength of 450 nm. Sample units were calculated based on the tetanus and diphtheria toxoid immune globulin standards. See Sedgurch and Bolton; J Clin Microbiol. 1983;18: 104-109. \n     Serum IgG directed against Haemophilus influenzae type b capsular polysaccharide (Hib) antigen was detected using a modified version of the procedure described by Anthony et al; J Clin Microbiol 1982;16:350-354. The modifications are described in Granoff, et al; J Infect Dis 1986;154:257-264. \n     Concentrations of total serum antibody to the Hib antigen were measured by a radioactive antigen-binding assay (Hib Farr) using the procedure described by Granoff et al; J Infect Dis 1986;154:257-264. The Hib antigen was purified and labeled with iodine. A reference serum pool from the US Bureau of Biologics (Rockville, Md.) was used to standardize the assay. The smallest amount of immunoglobulin detectable was 0.025 μg/mL serum, as determined with this reference pool. \n     Natural killer cell (NK cell) activity was measured using Histopaque-purified peripheral blood lymphocytes. The cytotoxicity of the NK cells was measured using procedure described by Wierda et al., J Immunol. Methods 1989;122:15-24. \n     Statistical Methods \n     The immunological variables were analyzed in two different ways. For the variables relating directly to vaccine response (Hib Farr, Hib IgG, tetanus, diphtheria, total IgG and IgA) the variables were transformed by taking logarithm base 10 and doing Analysis of Variences (ANOVAs). The procedure is commonly used in the vaccine literature. \n     Anthropometric data were analyzed for each gender separately. Analysis of Varience (ANOVA) was done at birth, initial visit, 2, 4, 6, 7, and 12 months of age for weight, length and head circumference. Weight gain, length gain and head circumference gain were also analyzed by ANOVAs. Intake data were ranked and analyzed by ANOVAs (number of feedings, volume intake, percent of feedings with spit-up, vomits or both). Stool variables were ranked and analyzed with ANOVAs (number of stools, mean rank consistency and percent of stools with gas or unusual odor). \n     RESULTS \n     Substantial amounts of data were collected on each of the 311 infants enrolled in this clinical investigation. Disclosure of all this information is outside the scope of this document, however, the following is a summary of the information that supports the novel and unobvious features of the instant invention. \n     Vaccine antibody response data was statistically analyzed by two methods. Table IV shows the medians of the variables in the original units. The ANOVA was performed on medians of ranked data. Table V shows geometric means. For this analysis, the variables were transformed by taking logarithm base 10, and the ANOVA compared the mean of the logs. The mean of the logs converted back to the original units is the geometric mean. Use of geometric means is commonly used in the vaccine literature. \n     At 7 months of age, infants in the NUC group had a higher antibody (P&lt;0.05) response than the CON or HM group to Hib vaccine (geometric mean of 7.24 vs 4.05 or 4.21 μG Ig/mL, respectively by the Hib Farr assay). The NUC group had a higher response than the HM group to diphtheria toxoid vaccine (geometric means of 1.77 vs 1.29 U diphtheria toxoid specific IgG/mL, respectively). The enhanced antibody response to Hib vaccine persisted through 12 months of age as seen in Table V. \n     There were no differences in NK activity at any time, and the differential white count, lymphocyte subsets, and lymphoblast transformation was very similar among all groups. The primary differences were at 12 months of age, when infants fed HM had more white blood cells, monocytes, lymphocytes, CD3, and CD19 cells than CON (P&lt;0.05). The NUC group was intermediate and not statistically different. Infants fed HM had greater numbers of NK cells (CD3-, CD16+, CD56+) than formula-fed (CON or NUC) infants (P&lt;0.05). The NUC group had a higher percent CD4 cells than HM-fed infants (P&lt;0.05) throughout the study. \n     Growth of infants was similar in all three groups. Tolerance and intake was similar for the two formula groups. \n     The similarity in growth and tolerance among all infants demonstrated that both formulas are acceptable. Likewise, the similarity in measures of immune system components among infants fed formulas or HM demonstrates that all feedings promote development of the immune system within normal ranges, however, for the first time an immune enhancement as measured by vaccine response to H. influenzae b and diphtheria toxoid is reported for infants consuming infant formula (NUC). \n     The consistently enhanced vaccine response of infants fed NUC vs CON suggests that nucleotides play an important function in immunological development of the infant. \n     DETAILED DISCUSSION OF RESULTS \n     Immunological Parameters \n     Vaccine response data are provided in Table IV as reported from the assays and Table V as geometric means. The antibody response to the Hib vaccine was measured as Hib Farr (μg Ig/mL). NUC-fed infants had significantly higher levels of Hib Farr antibody than infants fed HM at 6 months (0.43 vs 0.30, P&lt;0.05) higher than infants fed CON or HM at 7 months (7.7 vs 3.62 and 5.40, respectively, P&lt;0.05) and at 12 months (1.35 vs 0.68 and 0.82, respectively, P&lt;0.05). Hib response was also measured as Hib specific IgG, and the results paralleled the Hib Farr values at 6 and 7 months. This parameter was not measured at 12 months. \n     Response to the diphtheria vaccine was measured as diphtheria toxoid specific IgG. There were no differences between groups at 6 or 12 months, but at 7 months infants fed NUC had a significantly (P&lt;0.05) higher response (1.77 U/mL) than infants fed HM (1.29 U/mL). See Table V ). There were no differences at any time point for tetanus specific IgG. \n     \n                       TABLE IV                                                    \n______________________________________                                    \nVACCINE RESPONSE Median (n)                                               \n            NUC     CON        HM                                         \n______________________________________                                    \n6 months                                                                  \nHib Farr (μg Ig/mL).sup.1                                              \n              0.43 (93).sup.a                                             \n                        0.36 (96).sup.a,b                                 \n                                   0.30 (97).sup.b                        \nHib IgG (mg/mL)                                                           \n              0.06 (94).sup.a                                             \n                        0.06 (101).sup.a,b                                \n                                   0.03 (99).sup.b                        \nDiphtheria IgG (U/mL)                                                     \n              0.47 (78) 0.32 (85)  0.36 (80)                              \nTetanus (IgG (U/mL)                                                       \n              0.71 (80) 0.72 (82)  0.53 (80)                              \n7 months                                                                  \nHib Farr (μg Ig/mL)                                                    \n              7.70 (94).sup.a                                             \n                        3.62 (101).sup.b                                  \n                                   5.40 (99).sup.b                        \nHib IgG (mg/mL).sup.2                                                     \n              1.25 (93) 0.63 (101) 0.60 (97)                              \nDiphtheria IgG (U/mL)                                                     \n              0.70 (85).sup.a                                             \n                        1.53 (89).sup.a,b                                 \n                                   1.42 (90).sup.b                        \nTetanus IgG (U/mL)                                                        \n              5.01 (86) 4.47 (90)  4.75 (91)                              \n12 months                                                                 \nHib Farr (μg Ig/mL)                                                    \n              1.35 (89).sup.a                                             \n                        0.68 (94).sup.b                                   \n                                   0.82 (95).sup.b                        \nHib IgG (mg/mL)                                                           \n               ND.sup.3  ND         ND                                    \nDiphtheria IgG (U/mL)                                                     \n              0.30 (82) 0.24 (87)  0.30 (84)                              \nTetanus IgG (U/mL)                                                        \n              0.92 (83) 0.84 (87)  0.90 (85)                              \n______________________________________                                    \n .sup.1 Values in the same horizontal row with different superscripts (a o\n b) are significantly different, P &lt; 0.05.                                \n .sup.2 P &lt; 0.05, no pairwise differences                                 \n ND = not determined                                                      \n \n    \n     \n                       TABLE V                                                     \n______________________________________                                    \nVACCINE RESPONSE Geometric Mean (n).sup.1                                 \n            NUC     CON        HM                                         \n______________________________________                                    \n6 months                                                                  \nHib Farr (μg Ig/mL)                                                    \n              1.30 (93).sup.a                                             \n                        1.24 (96).sup.a,b                                 \n                                   1.23 (97).sup.b                        \nDiphtheria IgG (U/mL)                                                     \n              0.36 (78) 0.28 (85)  0.33 (80)                              \n7 months                                                                  \nHib Farr (μg Ig/mL)                                                    \n              7.24 (94).sup.a                                             \n                        4.05 (101).sup.b                                  \n                                   4.21 (99).sup.b                        \nDiphtheria IgG (U/mL)                                                     \n              1.77 (85).sup.a                                             \n                        1.38 (89).sup.a,b                                 \n                                   1.29 (90).sup.b                        \n12 months                                                                 \nHib Farr (μg Ig/mL)                                                    \n              1.41 (89).sup.a                                             \n                        0.76 (94).sup.b                                   \n                                   0.85 (95).sup.b                        \nDiphtheria IgG (U/mL)                                                     \n              0.33 (82) 0.25 (87)  0.27 (84)                              \n______________________________________                                    \n .sup.1 Values in the same horizontal row with different superscripts (a o\n b) are significantly different; P &lt; 0.05.                                \n .sup.2 P &lt; 0.05, no pairwise differences                                 \n \n    \n     It is generally accepted that a Hib FARR level of antibody greater than 1 μg of Ig/mL one month after immunization imparts protection to the infant. The percent of infants who had this level of protection was determined from the data set and is set forth in Table VI. The infants fed the NUC formula consistently had a 10% greater protection rate than infants in the other two groups. \n     \n                       TABLE VI                                                    \n______________________________________                                    \nHIB PROTECTION RATE                                                       \n(% of subjects with &gt; 1 μg anti-Hib Ig (mL)                            \n        NUC         CON     HM                                            \n______________________________________                                    \n 6 months 28%           18%     16%                                       \n 7 months 90%           80%     80%                                       \n12 months 55%           44%     45%                                       \n______________________________________                                    \n \n    \n     Natural killer (NK) cell activity was similar in all three groups. HM group had significantly higher numbers of NK cells (P&lt;0.05) than NUC at 2, 6, and 12 months and CON at 2, 7, and 12 months. Formula-fed infants had a higher percent CD4 cells at 2 months (NUC, CON&gt;HM; P&lt;0.005), 7 months (CON, NUC&gt;HM; P&lt;0.01), and 12 months (NUC&gt;HM; P&lt;0.05). The NK activity data are presented in Table VII. \n     \n                       TABLE VII                                                   \n______________________________________                                    \nNK ACTIVITY.sup.1                                                         \n        NUC         CON     HM                                            \n______________________________________                                    \n 2 months 11.2          8.0     9.0                                       \n 6 months 9.0           12.6    9.0                                       \n 7 months 13.9          14.3    13.0                                      \n12 months 19.4          21.3    21.4                                      \n______________________________________                                    \n .sup.1 Values are % target cells killed at effector:target ratio of 50:1.\n \n    \n     Part of the impetus for this study and evidence that different ratios and levels of nucleotides impact on different physiological parameters was the report by Carver et al. (Pediatrics 1991;88:359) that infants fed nucleotide-fortified SMA® (infant nutritional sold by Wyeth, Inc. believed to contain 21 mg CMP; 6.0 mg AMP; 6.0 mg UMP; 6.0 mg AMP and 3.0 mg IMP per liter of formula) had significantly higher NK activity than those fed unfortified SMA. The present study, using the formula according to the instant invention, shows no effect of nucleotides on NK activity at 2 months and in fact no difference, among any of the groups at any time. Given the small number of infants in the Carver study (42 degrees of freedom at 2 months) compared to this study (255 degrees of freedom at 2 months), it would seem likely the Carver data are an aberration due to small sample size or, the addition of nucleotides does not increase number of NK cells or, the types and levels of nucleotides used by Carver produced only a cellular response as opposed to the humoral response seen in this invention. \n     The anthropometric measurements indicate that growth was comparable among all infants in the study. The fact that even before controlling for birth values there were no differences among males for weight, length, or head circumference gives assurance that growth was acceptable among all groups. \n     The higher stool frequency and number of feedings per day of HM-fed infants compared to formula-fed infants during the first 2 months is well established. Softer stools of HM-fed infants are also common, although only the NUC group was different at 2 months and by a small amount. Overall, the measures of tolerance among all groups were very similar through 4 months when half the infants were still being exclusively breastfed. These data demonstrate both formulas were extremely well tolerated and are set forth in Table IX. \n     \n                       TABLE IX                                                    \n______________________________________                                    \nINTAKE AND TOLERANCE Mean (SEM).sup.1                                     \n______________________________________                                    \n2 months                                                                  \n             NUC     CON       HM                                         \n             100     107       103                                        \n______________________________________                                    \nFeedings (#/day)                                                          \n               6.2    (0.1)  6.4  (0.1)                                   \n                                       7.7  (0.2)                         \nIntake (mL/day)                                                           \n               831    (19)   823  (18) ND                                 \nSpit-up (% of feedings)                                                   \n               8      (2)    18   (2)  20   (2)                           \nStool Frequency (#/day)                                                   \n               1.6    (0.1)  1.4  (0.1)                                   \n                                       2.7  (0.2)                         \nStool Consistency.sup.2                                                   \n               2.0    (0.1)  1.9  (0.1)                                   \n                                       1.7  (0.1)                         \n______________________________________                                    \n4 months                                                                  \n             NUC     CON       HM                                         \n             98      107       103                                        \n______________________________________                                    \nFeedings (#/day)                                                          \n               5.9    (0.1)  6.0  (0.1)                                   \n                                       6.6  (0.2)                         \nIntake (mL/day)                                                           \n               987    (33)   926  (17) ND                                 \nSpit-up (% of feedings)                                                   \n               22     (2)    18   (2)  20   (2)                           \nStool Frequency (#/day)                                                   \n               1.4    (0.1)  1.4  (0.1)                                   \n                                       1.5  (0.1)                         \nStool Consistency.sup.2                                                   \n               2.0    (0.1)  2.1  (0.1)                                   \n                                       2.1  (0.1)                         \n______________________________________                                    \n .sup.1 Values in the same row with different superscripts are            \n significantly different; P &lt; 0.05.                                       \n .sup.2 Mean rank consistency, where 1 = watery, 2 = mushy, 3 = soft, 4 = \n formed, 5 = hard.                                                        \n \n    \n     The differential white counts and lymphocyte subset numbers of all infants receiving the formula according to this invention were well within normal ranges throughout the first year of life. \n     The vaccine response in this study was intended to be an immunological probe or indicator as to the responsiveness of the immune system in general. On the humoral side, tetanus toxoid vaccine was selected because it is a strong antigen, diphtheria toxoid was selected as a vaccine containing a weaker antigen, and Hib vaccine was selected as a very weak antigen that requires conjugation to a carrier protein to achieve a T-cell dependent immune response to the Hib polysaccharide component of the vaccine to be effective. It was thought that if nutritional intervention could evoke a difference in response that could be measured, that difference would more likely occur with the weaker antigens. While all infants would be expected to respond well to a strong antigen, like tetanus toxoid, a less vigorous response would be expected to a weak antigen. The Lederle Hib TITER® was selected specifically because the literature indicated that infants responded rather weakly after the first and second immunizations. Furthermore, the protein used as the conjugate in this vaccine, the CRM 197 protein (a non-toxic mutant diphtheria toxin), is antigenically very similar to diphtheria toxoid. Diphtheria toxoid vaccination also represents a response to a moderately weak antigen and correlates with immune response to the H. influenzae conjugate vaccine with the CRM 197 protein carrier. \n     The vaccine response at 6 months is taken from blood drawn immediately before the 6-month vaccination and represents the response 2 months after the second immunization given at 4 months of age. Already at that time point the Hib response was significantly higher in NUC than HM for both anti-Hib IgG and Hib Farr antibody. At 7 months, one month after the third immunization, NUC is significantly higher than CON and HM for Hib Farr. Hib IgG is higher at 7 months, and although there are not pairwise differences, the NUC group is double CON and HM (1.25 vs 0.63 and 0.60, respectively). The Hib Farr value was still significantly higher for NUCs at 12 months. For this weak antigen, a difference was first seen at 6 months. The difference became stronger at 7 months when the maximum response was expected and was maintained through 12 months of age. \n     In response to the moderately weak antigen diphtheria vaccine, there were no differences at 6 months, but at 7 months the NUC group was significantly higher than HM. By 12 months this difference was no longer present. For the moderately weak antigen, the direction of the present difference was the same as with the weak antigen (Hib) but was different only at the point of highest response. \n     For the strong antigen, tetanus, there were no differences among feeding groups at any time point. \n     These data strongly support the instant invention of specific nucleotide equivalents at specific levels and ratios to enhance the immune system. In this example and in commercial production of enteral formulas according to the invention, background levels of nucleotide equivalents are determined and then the formula would be supplemented with appropriate commodities, such as CMP, AMP, UMP and GMP, to the claimed levels and ratios. It should be remembered that by nucleotide equivalents is meant ribo-nucleotides, ribo-nucleosides, RNA, and ribo-nucleotide adjuncts, such as activated sugars. The sum of all these elements determine the total potentially available ribo-nucleotide equivalents. \n     Two additional pieces of data strongly support that the formula according to this invention provides an unexpected result. The number of subjects who have achieved protective levels of anti-Hib immunoglobulin as shown in Table VII is consistently 10% higher in the NUC group. The three-way comparison does not show a statistical difference. However, a two-way comparison between the NUC and CON formula groups at 7 months is significant (P&lt;0.05). An additional piece of data comes from two of the clinical sites which chose to collect morbidity data. As part of the study the incidence of diarrhea was determined at the two clinical study sites. Of 26 infants fed the NUC formula, only two reported diarrhea while 10 of 29 reported diarrhea in the CON formula. The χ 2  analysis comparing the incidence of diarrhea in infants fed the two formulas is significant (P&lt;0.05). In summary, the improved response to vaccination, the higher percent of subjects who have protective levels of antibodies, and the reduced incidence of diarrhea show that infants consuming the nucleotide-fortified formula according to this invention achieve enhanced immunological development as compared to those consuming the control formula. \n     INDUSTRIAL APPLICABILITY \n     The results from these experiments demonstrate that the enteral formula of this invention is effective in enhancing the immune system and treating diarrhea. The medical community is constantly searching for nutritional formulas that will benefit the infant. The present invention can clearly fill that need. The nucleotide equivalent level of the formula in the study is about the minimum for efficacious effect. Additionally, the formula is nutritionally complete as an infant formula. The manufacture of the formula utilizes conventional equipment and may be readily accomplished. \n     While the infant formula and method of making said formula herein described constitute a preferred embodiment of this invention, it is to be understood that the invention is not limited to this precise formulation or method and that changes may be made therein without departing from the scope of the invention which is defined in the appended claims.'

In [30]:
# bag of words for description_text
vectorizer = TfidfVectorizer(max_features=1000)  # Adjust 'max_features' as needed
X_train_de = encode_text_colum(X_train_ab, 'description_text', vectorizer)
X_test_de = encode_text_colum(X_test_ab, 'description_text', vectorizer)

AttributeError: 'int' object has no attribute 'lower'

In [None]:

df_no_missing = df_columns_dropped.fillna(df_columns_dropped.mean()).copy()

# extracting what we'll try to predict
y = df_no_missing['commercialized']
df_no_missing.drop('commercialized', axis= 1, inplace=True)



# dropping columns where all the value are the same (min = max) they would be zero if I apply min max rescaling
min_eq_max = df_no_missing.columns[df_no_missing.min() == df_no_missing.max()].to_list()
df_clean = df_no_missing.drop(min_eq_max, axis=1)


In [None]:
# split the data
X_train, X_test, y_train, y_test = train_test_split(df_clean, y, test_size=0.20, random_state=42)

#rescale 
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# bag of words for abstract
vectorizer = TfidfVectorizer(max_features=1000)  # Adjust 'max_features' as needed
X_train_ab = encode_text_colum(X_train, 'abstract', vectorizer)
X_test_ab = encode_text_colum(X_test, 'abstract', vectorizer)

# bag of words for description_text
vectorizer = TfidfVectorizer(max_features=1000)  # Adjust 'max_features' as needed
X_train_de = encode_text_colum(X_train_ab, 'description_text', vectorizer)
X_test_de = encode_text_colum(X_test_ab, 'description_text', vectorizer)


# Naive Bayes Model

In [60]:
# bag of words for description_text
vectorizer = TfidfVectorizer(max_features=1000)  # Adjust 'max_features' as needed
X_train_de = encode_text_colum(X_train_de, 'description_text', vectorizer)
X_test_de = encode_text_colum(X_test_de, 'description_text', vectorizer)

# Convert feature names to strings
X_train_de.columns = X_train_de.columns.astype(str)
X_test_de.columns = X_test_de.columns.astype(str)

# Train Naive Bayes model
naive_bayes_model = MultinomialNB()
naive_bayes_model.fit(X_train_de, y_train)

# Make predictions on the test set
predictions = naive_bayes_model.predict(X_test_de)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)

# Display the results
print(f"Naive Bayes Accuracy: {accuracy:.2f}")
print("Naive Bayes Classification Report:")
print(report)


NameError: name 'X_train_de' is not defined

In [8]:
# bag of words for description_text
vectorizer = TfidfVectorizer(max_features=1000)  # Adjust 'max_features' as needed
X_train_de = encode_text_colum(X_train_ab, 'description_text', vectorizer)
X_test_de = encode_text_colum(X_test_ab, 'description_text', vectorizer)

# Convert feature names to strings
X_train_de.columns = X_train_de.columns.astype(str)
X_test_de.columns = X_test_de.columns.astype(str)

# Train Naive Bayes model
naive_bayes_model = MultinomialNB()
naive_bayes_model.fit(X_train_de, y_train)

# Make predictions on the test set
predictions = naive_bayes_model.predict(X_test_de)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)

# Display the results
print(f"Naive Bayes Accuracy: {accuracy:.2f}")
print("Naive Bayes Classification Report:")
print(report)


KeyError: 'description_text'